From 9736cd0b67615586b643c1184da0f585ae5f1144 Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Mon, 17 Nov 2025 08:21:08 -0800
Subject: [PATCH 01/54] switch the return type for is_extension_available to
 bool (#2581)

from a review comment:
https://github.com/KhronosGroup/OpenCL-CTS/pull/2579#pullrequestreview-3463737641

We should use `bool` as the return type for `is_extension_available`,
not `int`.
---
 test_common/harness/deviceInfo.cpp | 2 +-
 test_common/harness/deviceInfo.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test_common/harness/deviceInfo.cpp b/test_common/harness/deviceInfo.cpp
index 5b1e7e03..e06d32aa 100644
--- a/test_common/harness/deviceInfo.cpp
+++ b/test_common/harness/deviceInfo.cpp
@@ -50,7 +50,7 @@ std::string get_device_info_string(cl_device_id device,
 }
 
 /* Determines if an extension is supported by a device. */
-int is_extension_available(cl_device_id device, const char *extensionName)
+bool is_extension_available(cl_device_id device, const char *extensionName)
 {
     std::string extString = get_device_extensions_string(device);
     std::istringstream ss(extString);
diff --git a/test_common/harness/deviceInfo.h b/test_common/harness/deviceInfo.h
index c3c8a512..30711b18 100644
--- a/test_common/harness/deviceInfo.h
+++ b/test_common/harness/deviceInfo.h
@@ -26,7 +26,7 @@ std::string get_device_info_string(cl_device_id device,
                                    cl_device_info param_name);
 
 /* Determines if an extension is supported by a device. */
-int is_extension_available(cl_device_id device, const char *extensionName);
+bool is_extension_available(cl_device_id device, const char *extensionName);
 
 /* Returns the version of the extension the device supports or throws an
  * exception if the extension is not supported by the device. */

From 7e26e3b5557ed5927781c912898c56f50d32db97 Mon Sep 17 00:00:00 2001
From: paulfradgley <39525348+paulfradgley@users.noreply.github.com>
Date: Tue, 4 Nov 2025 16:52:52 +0000
Subject: [PATCH 02/54] Replace error code for invalid size negative test

---
 .../images/kernel_read_write/test_cl_ext_image_from_buffer.cpp  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
index 8b82b9f9..e1fa5059 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
@@ -629,7 +629,7 @@ int image_from_small_buffer_negative(cl_device_id device, cl_context context,
 
                 clCreateImage(context, flag, &format, &image_desc, nullptr,
                               &err);
-                test_failure_error(err, CL_INVALID_MEM_OBJECT,
+                test_failure_error(err, CL_INVALID_IMAGE_SIZE,
                                    "Unexpected clCreateImage return");
 
                 err = clReleaseMemObject(buffer);

From 3d038fb7ea91455ac0a78f81f93680ae0e82a513 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 18 Nov 2025 17:51:41 +0100
Subject: [PATCH 03/54] Added a test for semaphore payloads that are ignored
 (#2554)

Fixes #2384 according to issue description
---
 .../cl_khr_semaphore/CMakeLists.txt           |  1 +
 .../cl_khr_semaphore/test_semaphores.cpp      |  6 --
 .../test_semaphores_payload.cpp               | 86 +++++++++++++++++++
 3 files changed, 87 insertions(+), 6 deletions(-)
 create mode 100644 test_conformance/extensions/cl_khr_semaphore/test_semaphores_payload.cpp

diff --git a/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt b/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
index 682ada5f..6f4ac812 100644
--- a/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
@@ -9,6 +9,7 @@ set(${MODULE_NAME}_SOURCES
          test_semaphores_negative_create.cpp
          test_semaphores_cross_queue.cpp
          test_semaphores_queries.cpp
+         test_semaphores_payload.cpp
          semaphore_base.h
 )
 
diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
index ce146b41..f4eaf5d1 100644
--- a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
+++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
@@ -14,14 +14,8 @@
 // limitations under the License.
 //
 
-#include <thread>
-
 #include "semaphore_base.h"
 
-#include "semaphore_base.h"
-
-#define FLUSH_DELAY_S 5
-
 namespace {
 
 const char* source = "__kernel void empty() {}";
diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores_payload.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_payload.cpp
new file mode 100644
index 00000000..94fb82e0
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_payload.cpp
@@ -0,0 +1,86 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "semaphore_base.h"
+
+namespace {
+
+struct PayloadSemaphore : public SemaphoreTestBase
+{
+    clSemaphoreWrapper sema_sec;
+
+    PayloadSemaphore(cl_device_id device, cl_context context,
+                     cl_command_queue queue, cl_int nelems)
+        : SemaphoreTestBase(device, context, queue, nelems), sema_sec(this)
+    {}
+
+    cl_int Run() override
+    {
+        cl_int err = CL_SUCCESS;
+
+        // Create semaphore
+        cl_semaphore_properties_khr sema_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            0
+        };
+        semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+        test_error(err, "Could not create semaphore");
+
+        sema_sec =
+            clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+        test_error(err, "Could not create semaphore");
+
+        {
+            cl_semaphore_payload_khr payload_list[] = { 1, 2 };
+            cl_semaphore_khr semaphores[2] = { semaphore, sema_sec };
+
+            // Signal semaphore
+            err = clEnqueueSignalSemaphoresKHR(
+                queue, 2, semaphores, payload_list, 0, nullptr, nullptr);
+            test_error(err, "Could not signal semaphore");
+        }
+
+        {
+            cl_semaphore_payload_khr payload_list[] = { 3, 4 };
+            cl_semaphore_khr semaphores[2] = { semaphore, sema_sec };
+
+            // Wait semaphore
+            err = clEnqueueWaitSemaphoresKHR(queue, 2, semaphores, payload_list,
+                                             0, nullptr, nullptr);
+            test_error(err, "Could not wait semaphore");
+        }
+
+        // Finish
+        err = clFinish(queue);
+        test_error(err, "Could not finish queue");
+
+        return CL_SUCCESS;
+    }
+};
+
+
+} // anonymous namespace
+
+// Confirm that a valid semaphore payload values list will be ignored if no
+// semaphores in the list of sema_objects require a payload
+REGISTER_TEST_VERSION(semaphores_payload, Version(1, 2))
+{
+    return MakeAndRunTest<PayloadSemaphore>(device, context, queue,
+                                            num_elements);
+}

From 6b59cae6e974a1974dca76d77680fe6aaeddd65b Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 18 Nov 2025 17:52:03 +0100
Subject: [PATCH 04/54] Added test to verify process of building a kernel with
 long name (#2559)

Closes #2422 according to issue description
---
 .../compiler/test_build_helpers.cpp           | 111 ++++++++++++++++++
 1 file changed, 111 insertions(+)

diff --git a/test_conformance/compiler/test_build_helpers.cpp b/test_conformance/compiler/test_build_helpers.cpp
index e8ef3ee8..46a54efd 100644
--- a/test_conformance/compiler/test_build_helpers.cpp
+++ b/test_conformance/compiler/test_build_helpers.cpp
@@ -16,6 +16,7 @@
 #include "testBase.h"
 #include "harness/testHarness.h"
 #include "harness/parseParameters.h"
+#include "harness/stringHelpers.h"
 
 #include <array>
 #include <memory>
@@ -1009,3 +1010,113 @@ REGISTER_TEST(get_program_build_info)
 
     return 0;
 }
+
+cl_int test_kernel_name_len(cl_context context, cl_device_id device,
+                            const cl_uint length)
+{
+    cl_int error = CL_SUCCESS;
+
+    std::string buf = { "abcdefghijklmnopqrstuvwxyz" };
+    std::string name;
+    name.reserve(length);
+
+    for (cl_uint i = 0; i < length; ++i) name += buf[i % buf.size()];
+
+    const char *sample_name_size_test_kernel = R"(
+        __kernel void %s(int src, __global int *dst)
+        {
+            dst[0]=src;
+        }
+    )";
+    std::string program_source =
+        str_sprintf(std::string(sample_name_size_test_kernel), name.c_str());
+    const char *ptr = program_source.c_str();
+
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+
+        error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
+                                            name.c_str());
+        if (error != CL_SUCCESS)
+        {
+            log_error("ERROR: Unable to create program with length of "
+                      "kernel name "
+                      "%d : %s! (%s from %s:%d)\n",
+                      length, name.c_str(), IGetErrorString(error), __FILE__,
+                      __LINE__);
+            return TEST_FAIL;
+        }
+
+        // query kernel name
+        size_t kernel_name_size = 0;
+        error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, 0, nullptr,
+                                &kernel_name_size);
+        test_error(error, "clGetKernelInfo (size) failed");
+
+        std::vector<char> kernel_name(kernel_name_size);
+        error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME,
+                                kernel_name_size, kernel_name.data(), nullptr);
+        test_error(error, "clGetKernelInfo (name) failed");
+
+        if (name != std::string(kernel_name.data()))
+        {
+            log_error("Kernel name mismatch! expected=%s got=%s\n",
+                      name.c_str(), kernel_name.data());
+            return TEST_FAIL;
+        }
+    }
+
+    if (gCompilationMode == kOnline)
+    {
+        clProgramWrapper programObj =
+            clCreateProgramWithSource(context, 1, &ptr, nullptr, &error);
+        test_error(error, "clCreateProgramWithSource failed (compile)");
+
+        error = clCompileProgram(programObj, 0, nullptr, nullptr, 0, nullptr,
+                                 nullptr, nullptr, nullptr);
+        if (error != CL_SUCCESS)
+        {
+            log_error("ERROR: Unable to compile program with length of "
+                      "kernel name "
+                      "%d : %s! (%s from %s:%d)\n",
+                      length, name.c_str(), IGetErrorString(error), __FILE__,
+                      __LINE__);
+            return TEST_FAIL;
+        }
+
+        clProgramWrapper linkedProgram =
+            clLinkProgram(context, 0, nullptr, nullptr, 1, &programObj, nullptr,
+                          nullptr, &error);
+        if (error != CL_SUCCESS)
+        {
+            log_error("ERROR: Unable to link program with length of "
+                      "kernel name "
+                      "%d : %s! (%s from %s:%d)\n",
+                      length, name.c_str(), IGetErrorString(error), __FILE__,
+                      __LINE__);
+            return TEST_FAIL;
+        }
+
+        clKernelWrapper kernel =
+            clCreateKernel(linkedProgram, name.c_str(), &error);
+        test_error(error, "clCreateKernel after link failed");
+    }
+
+    return TEST_PASS;
+}
+
+REGISTER_TEST(kernel_name_size)
+{
+    for (cl_uint len = 32; len <= 2048; len *= 2)
+    {
+        cl_int status = test_kernel_name_len(context, device, len);
+        if (status == TEST_FAIL)
+        {
+            log_error("ERROR: test_kernel_name_len failed with length %d\n",
+                      len);
+            return TEST_FAIL;
+        }
+    }
+    return TEST_PASS;
+}

From ec546b80c5995c981e1d92bf88e29ade01014cd5 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 18 Nov 2025 17:53:42 +0100
Subject: [PATCH 05/54] Added an in-order queue variant for the
 semaphores_import_export_fd test (#2542)

Fixes #2213 according to mentioned discussion
---
 .../test_external_semaphore_sync_fd.cpp       | 91 +++++++++++--------
 1 file changed, 51 insertions(+), 40 deletions(-)

diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp
index 2fcf4f3f..f4636c9a 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp
@@ -18,39 +18,10 @@
 #include "harness/extensionHelpers.h"
 #include "harness/errorHelpers.h"
 
-// Test it is possible to export a semaphore to a sync fd and import the same
-// sync fd to a new semaphore
-REGISTER_TEST_VERSION(external_semaphores_import_export_fd, Version(1, 2))
+cl_int doTest(cl_device_id device, cl_context context, cl_command_queue queue)
 {
     cl_int err = CL_SUCCESS;
 
-    if (!is_extension_available(device, "cl_khr_external_semaphore"))
-    {
-        log_info(
-            "cl_khr_external_semaphore is not supported on this platoform. "
-            "Skipping test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    if (!is_extension_available(device, "cl_khr_external_semaphore_sync_fd"))
-    {
-        log_info("cl_khr_external_semaphore_sync_fd is not supported on this "
-                 "platoform. Skipping test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    cl_command_queue_properties device_props = 0;
-    err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
-                          sizeof(device_props), &device_props, NULL);
-    test_error(err, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
-
-    if ((device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0)
-    {
-        log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE not "
-                 "supported. Skipping test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
     // Obtain pointers to semaphore's API
     GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
     GET_PFN(device, clEnqueueSignalSemaphoresKHR);
@@ -58,11 +29,6 @@ REGISTER_TEST_VERSION(external_semaphores_import_export_fd, Version(1, 2))
     GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
     GET_PFN(device, clReleaseSemaphoreKHR);
 
-    // Create ooo queue
-    clCommandQueueWrapper test_queue = clCreateCommandQueue(
-        context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
-    test_error(err, "Could not create command queue");
-
     // Create semaphore
     cl_semaphore_properties_khr sema_1_props[] = {
         static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
@@ -81,8 +47,8 @@ REGISTER_TEST_VERSION(external_semaphores_import_export_fd, Version(1, 2))
 
     // Signal semaphore
     clEventWrapper signal_event;
-    err = clEnqueueSignalSemaphoresKHR(test_queue, 1, &sema_1, nullptr, 0,
-                                       nullptr, &signal_event);
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 0, nullptr,
+                                       &signal_event);
     test_error(err, "Could not signal semaphore");
 
     // Extract sync fd
@@ -109,12 +75,12 @@ REGISTER_TEST_VERSION(external_semaphores_import_export_fd, Version(1, 2))
 
     // Wait semaphore
     clEventWrapper wait_event;
-    err = clEnqueueWaitSemaphoresKHR(test_queue, 1, &sema_2, nullptr, 0,
-                                     nullptr, &wait_event);
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_2, nullptr, 0, nullptr,
+                                     &wait_event);
     test_error(err, "Could not wait semaphore");
 
     // Finish
-    err = clFinish(test_queue);
+    err = clFinish(queue);
     test_error(err, "Could not finish queue");
 
     // Check all events are completed
@@ -129,3 +95,48 @@ REGISTER_TEST_VERSION(external_semaphores_import_export_fd, Version(1, 2))
     test_error(err, "Could not release semaphore");
     return TEST_PASS;
 }
+
+// Test it is possible to export a semaphore to a sync fd and import the same
+// sync fd to a new semaphore
+REGISTER_TEST_VERSION(external_semaphores_import_export_fd, Version(1, 2))
+{
+    REQUIRE_EXTENSION("cl_khr_external_semaphore");
+    REQUIRE_EXTENSION("cl_khr_external_semaphore_sync_fd");
+
+    cl_int err = CL_SUCCESS;
+    cl_int total_status = TEST_PASS;
+
+    // test external semaphore sync fd with out-of-order queue
+    {
+        cl_command_queue_properties device_props = 0;
+        err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
+                              sizeof(device_props), &device_props, NULL);
+        test_error(err,
+                   "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
+
+        if ((device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0)
+        {
+            // Create ooo queue
+            clCommandQueueWrapper test_queue = clCreateCommandQueue(
+                context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+            test_error(err, "Could not create command queue");
+
+            cl_int status = doTest(device, context, test_queue);
+            if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF)
+            {
+                total_status = TEST_FAIL;
+            }
+        }
+    }
+
+    // test external semaphore sync fd with in-order harness queue
+    {
+        cl_int status = doTest(device, context, queue);
+        if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF)
+        {
+            total_status = TEST_FAIL;
+        }
+    }
+
+    return total_status;
+}

From 02e99f4554fb7f353562a09d1e922f19d2ff7117 Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Tue, 18 Nov 2025 15:06:57 -0800
Subject: [PATCH 06/54] fixes a warning and a typo in the mutable dispatch test
 (#2579)

Fixes a warning in the mutable dispatch test with some compilers:

```
3>C:\git\OpenCL-CTS\test_conformance\extensions\cl_khr_command_buffer\cl_khr_command_buffer_mutable_dispatch\mutable_command_basic.h(82,16): warning C4805: '==': unsafe mix of type 'int' and type 'bool' in operation
```

Also fixes a misspelled variable name while we're at it.
---
 .../mutable_command_basic.h                          | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
index 59f07dd7..47b0c97a 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
@@ -76,12 +76,10 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
 
     bool Skip() override
     {
-        bool extension_avaliable =
-            is_extension_available(device,
-                                   "cl_khr_command_buffer_mutable_dispatch")
-            == true;
+        bool extension_available = is_extension_available(
+            device, "cl_khr_command_buffer_mutable_dispatch");
 
-        if (extension_avaliable)
+        if (extension_available)
         {
             Version device_version = get_device_cl_version(device);
             if ((device_version >= Version(3, 0))
@@ -96,7 +94,7 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
                     log_info("cl_khr_command_buffer_mutable_dispatch version "
                              "0.9.4 is "
                              "required to run the test, skipping.\n ");
-                    extension_avaliable = false;
+                    extension_available = false;
                 }
             }
         }
@@ -109,7 +107,7 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
                 sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
             && mutable_capabilities != 0;
 
-        return !mutable_support || !extension_avaliable
+        return !mutable_support || !extension_available
             || BasicCommandBufferTest::Skip();
     }
 

From afc7e64c57937d392c06cd3a2408ffe50510c35a Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Wed, 26 Nov 2025 16:10:47 -0800
Subject: [PATCH 07/54] fix the null required work group size test (#2576)

A 3D required work-group size is always valid, but a 1D or 2D required
work-group size is only valid when the work-group size in those
dimensions is equal to one.

fixes #2575
---
 .../api/test_kernel_attributes.cpp            | 55 ++++++++++---------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/test_conformance/api/test_kernel_attributes.cpp b/test_conformance/api/test_kernel_attributes.cpp
index 86b3595c..dd50e0f9 100644
--- a/test_conformance/api/test_kernel_attributes.cpp
+++ b/test_conformance/api/test_kernel_attributes.cpp
@@ -385,14 +385,14 @@ REGISTER_TEST(null_required_work_group_size)
 
     struct KernelAttribInfo
     {
-        std::string str;
-        cl_uint max_dim;
+        cl_int wgs[3];
+        cl_uint min_dim;
     };
 
     std::vector<KernelAttribInfo> attribs;
-    attribs.push_back({ "__attribute__((reqd_work_group_size(2,1,1)))", 1 });
-    attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,1)))", 2 });
-    attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,4)))", 3 });
+    attribs.push_back({ { 2, 1, 1 }, 1 });
+    attribs.push_back({ { 2, 3, 1 }, 2 });
+    attribs.push_back({ { 2, 3, 4 }, 3 });
 
     const std::string body_str = R"(
         __kernel void wg_size(__global int* dst)
@@ -409,7 +409,11 @@ REGISTER_TEST(null_required_work_group_size)
 
     for (auto& attrib : attribs)
     {
-        const std::string source_str = attrib.str + body_str;
+        const std::string attrib_str = "__attribute__((reqd_work_group_size("
+            + std::to_string(attrib.wgs[0]) + ","
+            + std::to_string(attrib.wgs[1]) + ","
+            + std::to_string(attrib.wgs[2]) + ")))";
+        const std::string source_str = attrib_str + body_str;
         const char* source = source_str.c_str();
 
         clProgramWrapper program;
@@ -421,21 +425,19 @@ REGISTER_TEST(null_required_work_group_size)
         error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst);
         test_error(error, "clSetKernelArg failed");
 
-        for (cl_uint work_dim = 1; work_dim <= attrib.max_dim; work_dim++)
+        for (cl_uint work_dim = attrib.min_dim; work_dim <= 3; work_dim++)
         {
-            const cl_int expected[3] = { 2, work_dim >= 2 ? 3 : 1,
-                                         work_dim >= 3 ? 4 : 1 };
             const size_t test_work_group_size =
-                expected[0] * expected[1] * expected[2];
-            if ((size_t)expected[0] > device_max_work_item_sizes[0]
-                || (size_t)expected[1] > device_max_work_item_sizes[1]
-                || (size_t)expected[2] > device_max_work_item_sizes[2]
+                attrib.wgs[0] * attrib.wgs[1] * attrib.wgs[2];
+            if ((size_t)attrib.wgs[0] > device_max_work_item_sizes[0]
+                || (size_t)attrib.wgs[1] > device_max_work_item_sizes[1]
+                || (size_t)attrib.wgs[2] > device_max_work_item_sizes[2]
                 || test_work_group_size > device_max_work_group_size)
             {
                 log_info("Skipping test for work_dim = %u: required work group "
                          "size (%i, %i, %i) (total %zu) exceeds device max "
                          "work group size (%zu, %zu, %zu) (total %zu)\n",
-                         work_dim, expected[0], expected[1], expected[2],
+                         work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
                          test_work_group_size, device_max_work_item_sizes[0],
                          device_max_work_item_sizes[1],
                          device_max_work_item_sizes[2],
@@ -444,8 +446,9 @@ REGISTER_TEST(null_required_work_group_size)
             }
 
             const cl_int zero = 0;
-            error = clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0,
-                                        sizeof(expected), 0, nullptr, nullptr);
+            error =
+                clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0,
+                                    sizeof(attrib.wgs), 0, nullptr, nullptr);
 
             const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 };
             error = clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr,
@@ -458,12 +461,12 @@ REGISTER_TEST(null_required_work_group_size)
                                         results, 0, nullptr, nullptr);
             test_error(error, "clEnqueueReadBuffer failed");
 
-            if (results[0] != expected[0] || results[1] != expected[1]
-                || results[2] != expected[2])
+            if (results[0] != attrib.wgs[0] || results[1] != attrib.wgs[1]
+                || results[2] != attrib.wgs[2])
             {
                 log_error("Executed local size mismatch with work_dim = %u: "
                           "Expected (%d,%d,%d) got (%d,%d,%d)\n",
-                          work_dim, expected[0], expected[1], expected[2],
+                          work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
                           results[0], results[1], results[2]);
                 return TEST_FAIL;
             }
@@ -477,15 +480,15 @@ REGISTER_TEST(null_required_work_group_size)
                 test_error(error,
                            "clGetKernelSuggestedLocalWorkSizeKHR failed");
 
-                if ((cl_int)suggested[0] != expected[0]
-                    || (cl_int)suggested[1] != expected[1]
-                    || (cl_int)suggested[2] != expected[2])
+                if (suggested[0] != (size_t)attrib.wgs[0]
+                    || suggested[1] != (size_t)attrib.wgs[1]
+                    || suggested[2] != (size_t)attrib.wgs[2])
                 {
                     log_error("Suggested local size mismatch with work_dim = "
-                              "%u: Expected (%d,%d,%d) got (%d,%d,%d)\n",
-                              work_dim, expected[0], expected[1], expected[2],
-                              (cl_int)suggested[0], (cl_int)suggested[1],
-                              (cl_int)suggested[2]);
+                              "%u: Expected (%d,%d,%d) got (%zu,%zu,%zu)\n",
+                              work_dim, attrib.wgs[0], attrib.wgs[1],
+                              attrib.wgs[2], suggested[0], suggested[1],
+                              suggested[2]);
                     return TEST_FAIL;
                 }
             }

From 0da389cedb81a87f175931b28ed911ca43fbc394 Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Wed, 26 Nov 2025 16:11:20 -0800
Subject: [PATCH 08/54] temporarily disable the negative_invalid_arg_size_local
 test (#2573)

Temporarily disable the negative_invalid_arg_size_local test, pending
working group discussion.
---
 test_conformance/api/test_kernels.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp
index 47100b7a..d0dc116f 100644
--- a/test_conformance/api/test_kernels.cpp
+++ b/test_conformance/api/test_kernels.cpp
@@ -982,6 +982,12 @@ REGISTER_TEST(negative_invalid_arg_index)
 
 REGISTER_TEST(negative_invalid_arg_size_local)
 {
+    if (true)
+    {
+        log_info("Disabling this test temporarily, see internal issue 374.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
     cl_int error = CL_SUCCESS;
     clProgramWrapper program;
     clKernelWrapper local_arg_kernel;

From b7808f2b2d7596f97153e24f18da7bee6f00322e Mon Sep 17 00:00:00 2001
From: Ole Strohm <ole.strohm@arm.com>
Date: Thu, 27 Nov 2025 00:19:03 +0000
Subject: [PATCH 09/54] Limit work group size by the max item size in
 mutable_command_full_dispatch (#2578)

The maximum value for the workgroup size in a specific dimension can be
lower than the overall maximum workgroup size. This patch queries for
the maximum work item size in the first dimension and limits the
group_size by that value as well.

Signed-off-by: Ole Strohm <ole.strohm@arm.com>
---
 .../mutable_command_full_dispatch.cpp             | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp
index b5c8ecd9..347f6407 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp
@@ -126,7 +126,20 @@ struct MutableCommandFullDispatch : InfoMutableCommandBufferTest
             &workgroupinfo_size, NULL);
         test_error(error, "clGetKernelWorkGroupInfo failed");
 
-        group_size = std::min(num_elements, workgroupinfo_size);
+        cl_uint max_work_dimension = 0;
+        error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
+                                sizeof(max_work_dimension), &max_work_dimension,
+                                NULL);
+        test_error(error, "clGetDeviceInfo failed");
+
+        std::vector<size_t> max_work_item_sizes(max_work_dimension, 0);
+        error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+                                sizeof(size_t) * max_work_item_sizes.size(),
+                                max_work_item_sizes.data(), NULL);
+        test_error(error, "clGetDeviceInfo failed");
+
+        group_size = std::min(
+            { num_elements, workgroupinfo_size, max_work_item_sizes[0] });
         const size_t size_to_allocate_src = group_size * sizeof(cl_int);
 
         // create and initialize source buffer

From df46a38e314999b47a5417a04103b461f9c99f5b Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin@google.com>
Date: Thu, 27 Nov 2025 01:19:33 +0100
Subject: [PATCH 10/54] Better support for -cl-uniform-work-group-size (#2564)

Add support for build options available only after a certain version:
- -cl-uniform-work-group-size after 2.0
- -cl-no-subgroup-ifp after 2.1

Add specific test for cl-uniform-work-group-size
- Check that test can be executed when work group size is uniform.
- Check that test returns the proper error code when work group size is
not uniform.

Ref #2563
---
 .../compiler/test_build_options.cpp           | 122 +++++++++++++-----
 1 file changed, 89 insertions(+), 33 deletions(-)

diff --git a/test_conformance/compiler/test_build_options.cpp b/test_conformance/compiler/test_build_options.cpp
index daba1b0e..52cb8eaa 100644
--- a/test_conformance/compiler/test_build_options.cpp
+++ b/test_conformance/compiler/test_build_options.cpp
@@ -15,6 +15,9 @@
 //
 #include "testBase.h"
 #include "harness/os_helpers.h"
+#include "harness/testHarness.h"
+
+#include <array>
 
 const char *preprocessor_test_kernel[] = {
 "__kernel void sample_test(__global int *dst)\n"
@@ -42,26 +45,22 @@ const char *include_test_kernel[] = {
 "\n"
 "}\n" };
 
-const char *options_test_kernel[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    size_t tid = get_global_id(0);\n"
-    "    dst[tid] = (int)src[tid];\n"
-    "}\n"
-};
+const char *options_test_kernel[] = { "__kernel void sample_test() {}\n" };
 
-const char *optimization_options[] = {
-    "-cl-single-precision-constant",
-    "-cl-denorms-are-zero",
-    "-cl-opt-disable",
-    "-cl-mad-enable",
-    "-cl-no-signed-zeros",
-    "-cl-unsafe-math-optimizations",
-    "-cl-finite-math-only",
-    "-cl-fast-relaxed-math",
-    "-w",
-    "-Werror",
-    };
+std::array optimization_options{
+    std::pair{ "-cl-single-precision-constant", Version(1, 0) },
+    std::pair{ "-cl-denorms-are-zero", Version(1, 0) },
+    std::pair{ "-cl-opt-disable", Version(1, 0) },
+    std::pair{ "-cl-mad-enable", Version(1, 0) },
+    std::pair{ "-cl-no-signed-zeros", Version(1, 0) },
+    std::pair{ "-cl-unsafe-math-optimizations", Version(1, 0) },
+    std::pair{ "-cl-finite-math-only", Version(1, 0) },
+    std::pair{ "-cl-fast-relaxed-math", Version(1, 0) },
+    std::pair{ "-w", Version(1, 0) },
+    std::pair{ "-Werror", Version(1, 0) },
+    std::pair{ "-cl-uniform-work-group-size", Version(2, 0) },
+    std::pair{ "-cl-no-subgroup-ifp", Version(2, 1) },
+};
 
 cl_int get_result_from_program( cl_context context, cl_command_queue queue, cl_program program, cl_int *outValue )
 {
@@ -93,31 +92,41 @@ REGISTER_TEST(options_build_optimizations)
     int error;
     cl_build_status status;
 
-    for(size_t i = 0; i < sizeof(optimization_options) / (sizeof(char*)); i++) {
+    Version version = get_device_cl_version(device);
 
-        clProgramWrapper program;
-        error = create_single_kernel_helper_create_program(context, &program, 1, options_test_kernel, optimization_options[i]);
-        if( program == NULL || error != CL_SUCCESS )
+    for (const auto &optimization_option : optimization_options)
+    {
+        if (version < optimization_option.second)
         {
-            log_error( "ERROR: Unable to create reference program!\n" );
+            continue;
+        }
+
+        const char *option = optimization_option.first;
+        clProgramWrapper program;
+        error = create_single_kernel_helper_create_program(
+            context, &program, 1, options_test_kernel, option);
+        if (program == NULL || error != CL_SUCCESS)
+        {
+            log_error("ERROR: Unable to create reference program!\n");
             return -1;
         }
 
         /* Build with the macro defined */
-        log_info("Testing optimization option '%s'\n", optimization_options[i]);
-        error = clBuildProgram(program, 1, &device, optimization_options[i],
-                               NULL, NULL);
-        test_error( error, "Test program did not properly build" );
+        log_info("Testing optimization option '%s'\n", option);
+        error = clBuildProgram(program, 1, &device, option, NULL, NULL);
+        test_error(error, "Test program did not properly build");
 
         error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS,
                                       sizeof(status), &status, NULL);
-        test_error( error, "Unable to get program build status" );
+        test_error(error, "Unable to get program build status");
 
-        if( (int)status != CL_BUILD_SUCCESS )
+        if ((int)status != CL_BUILD_SUCCESS)
         {
-            log_info("Building with optimization option '%s' failed to compile!\n", optimization_options[i]);
-            print_error( error, "Failed to build with optimization defined")
-            return -1;
+            log_info(
+                "Building with optimization option '%s' failed to compile!\n",
+                option);
+            print_error(error,
+                        "Failed to build with optimization defined") return -1;
         }
     }
     return 0;
@@ -415,3 +424,50 @@ REGISTER_TEST(options_denorm_cache)
 
     return 0;
 }
+
+REGISTER_TEST(options_uniform_work_group_size)
+{
+    if (get_device_cl_version(device) < Version(2, 0))
+    {
+        return TEST_SKIPPED_ITSELF;
+    }
+    const char *options = "-cl-uniform-work-group-size";
+    clProgramWrapper program;
+    int error = create_single_kernel_helper_create_program(
+        context, &program, 1, options_test_kernel, options);
+    if (program == NULL || error != CL_SUCCESS)
+    {
+        log_error("Error: Unable to create reference program!\n");
+        return TEST_FAIL;
+    }
+    error = clBuildProgram(program, 1, &device, options, NULL, NULL);
+    test_error(error, "Test program did not properly build");
+
+    clKernelWrapper kernel = clCreateKernel(program, "sample_test", &error);
+    test_error(error, "Unable to create kernel");
+
+    size_t global_work_size = 4;
+    size_t uniform_local_work_size = 2;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size,
+                                   &uniform_local_work_size, 0, NULL, NULL);
+    test_error(error,
+               "Unable to enqueue NDRange kernel with uniform work group size");
+    error = clFinish(queue);
+    test_error(error, "Unable to finish");
+
+    size_t non_uniform_local_work_size = 3;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size,
+                                   &non_uniform_local_work_size, 0, NULL, NULL);
+
+    if (error != CL_INVALID_WORK_GROUP_SIZE)
+    {
+        log_error(
+            "Error: expected error 'CL_INVALID_WORK_GROUP_SIZE' (got '%s') "
+            "trying to enqueue kernel compiled with '%s' with non-uniform work "
+            "group size\n",
+            IGetErrorString(error), options);
+        return TEST_FAIL;
+    }
+
+    return TEST_PASS;
+}

From 34e9baecdae439414c06c589bf8189d025ad7027 Mon Sep 17 00:00:00 2001
From: Ewan Crawford <ewan.cr@gmail.com>
Date: Thu, 27 Nov 2025 00:19:59 +0000
Subject: [PATCH 11/54] Test mutable command-buffer deferred kernel arguments
 (#2538)

Tests the scenario outlined in
https://github.com/KhronosGroup/OpenCL-CTS/issues/2520#issuecomment-3280930492
to verify the functionality added in OpenCL-Docs PR
https://github.com/KhronosGroup/OpenCL-Docs/pull/1382.

Closes #2520

---------

Co-authored-by: Ben Ashbaugh <ben.ashbaugh@intel.com>
---
 .../CMakeLists.txt                            |   1 +
 .../mutable_command_basic.h                   |   4 +-
 .../mutable_command_defer_arguments.cpp       | 219 ++++++++++++++++++
 3 files changed, 222 insertions(+), 2 deletions(-)
 create mode 100644 test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_defer_arguments.cpp

diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
index aed183ff..bb085035 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
@@ -16,6 +16,7 @@ set(${MODULE_NAME}_SOURCES
     mutable_command_work_groups.cpp
     mutable_command_work_dim.cpp
     mutable_command_update_state.cpp
+    mutable_command_defer_arguments.cpp
     ../basic_command_buffer.cpp
 )
 
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
index 47b0c97a..917183c8 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
@@ -89,10 +89,10 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
                 cl_version extension_version = get_extension_version(
                     device, "cl_khr_command_buffer_mutable_dispatch");
 
-                if (extension_version != CL_MAKE_VERSION(0, 9, 4))
+                if (extension_version != CL_MAKE_VERSION(0, 9, 5))
                 {
                     log_info("cl_khr_command_buffer_mutable_dispatch version "
-                             "0.9.4 is "
+                             "0.9.5 is "
                              "required to run the test, skipping.\n ");
                     extension_available = false;
                 }
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_defer_arguments.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_defer_arguments.cpp
new file mode 100644
index 00000000..2e10cbf8
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_defer_arguments.cpp
@@ -0,0 +1,219 @@
+//
+// Copyright (c) 2025 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "testHarness.h"
+#include "mutable_command_basic.h"
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// Mutable dispatch test which handles the case where all the arguments of a
+// kernel aren't set when a kernel is initially added to a mutable
+// command-buffer, but deferred until an update is made to the command to set
+// them before command-buffer enqueue.
+struct MutableDispatchDeferArguments : public BasicMutableCommandBufferTest
+{
+    MutableDispatchDeferArguments(cl_device_id device, cl_context context,
+                                  cl_command_queue queue)
+        : BasicMutableCommandBufferTest(device, context, queue)
+    {}
+
+    bool Skip() override
+    {
+        if (BasicMutableCommandBufferTest::Skip()) return true;
+        cl_mutable_dispatch_fields_khr mutable_capabilities;
+        bool mutable_support =
+            !clGetDeviceInfo(
+                device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
+                sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
+            && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
+
+        // require mutable arguments capability
+        return !mutable_support;
+    }
+
+    cl_int SetUpKernel() override
+    {
+        // Create kernel
+        const char *defer_args_kernel =
+            R"(
+            __kernel void defer_args_test(__constant int *src, __global int *dst)
+            {
+                size_t tid = get_global_id(0);
+                dst[tid] = src[tid];
+            })";
+
+        cl_int error =
+            create_single_kernel_helper(context, &program, &kernel, 1,
+                                        &defer_args_kernel, "defer_args_test");
+        test_error(error, "Creating kernel failed");
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUpKernelArgs() override
+    {
+        // Create and initialize buffers
+        MTdataHolder d(gRandomSeed);
+
+        src_data.resize(num_elements);
+        for (size_t i = 0; i < num_elements; i++)
+            src_data[i] = (cl_int)genrand_int32(d);
+
+        cl_int error = CL_SUCCESS;
+        in_mem = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                                num_elements * sizeof(cl_int), src_data.data(),
+                                &error);
+        test_error(error, "Creating src buffer");
+
+        out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                 num_elements * sizeof(cl_int), NULL, &error);
+        test_error(error, "Creating initial dst buffer failed");
+
+        // Only set a single kernel argument, leaving argument at index 1 unset
+        error = clSetKernelArg(kernel, 0, sizeof(in_mem), &in_mem);
+        test_error(error, "Unable to set src kernel arguments");
+
+        return CL_SUCCESS;
+    }
+
+    bool verify_state(cl_command_buffer_state_khr expected)
+    {
+        cl_command_buffer_state_khr state = ~cl_command_buffer_state_khr(0);
+        cl_int error = clGetCommandBufferInfoKHR(
+            command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state), &state,
+            nullptr);
+        if (error != CL_SUCCESS)
+        {
+            log_error("clGetCommandBufferInfoKHR failed: %d", error);
+            return false;
+        }
+
+        if (state != expected)
+        {
+            log_error("Unexpected result of CL_COMMAND_BUFFER_STATE_KHR query. "
+                      "Expected %u, but was %u\n",
+                      expected, state);
+            return false;
+        }
+        return true;
+    }
+
+    bool verify_result(const cl_mem &buffer)
+    {
+        std::vector<cl_int> data(num_elements);
+        cl_int error =
+            clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size(),
+                                data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            if (data[i] != src_data[i])
+            {
+                log_error("Modified verification failed at index %zu: Got %d, "
+                          "wanted %d\n",
+                          i, data[i], src_data[i]);
+                return false;
+            }
+        }
+        return true;
+    }
+
+    cl_int Run() override
+    {
+        // Create command while the kernel still has the second argument unset.
+        // Passing 'CL_MUTABLE_DISPATCH_ARGUMENTS_KHR' as a property means this
+        // shouldn't be an error.
+        cl_command_properties_khr props[] = {
+            CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
+            CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
+        };
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, &command);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        // Finalizing the command buffer shouldn't be an error, but result in
+        // the command-buffer entering the CL_COMMAND_BUFFER_STATE_FINALIZED
+        // state.
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+        if (!verify_state(CL_COMMAND_BUFFER_STATE_FINALIZED_KHR))
+        {
+            return TEST_FAIL;
+        }
+
+        // Check that trying to enqueue the command-buffer in this state is an
+        // error, as it needs to be in the executable state.
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_failure_error_ret(error, CL_INVALID_OPERATION,
+                               "clEnqueueCommandBufferKHR should return "
+                               "CL_INVALID_OPERATION",
+                               TEST_FAIL);
+
+        // Update the kernel command to set the missing argument.
+        cl_mutable_dispatch_arg_khr arg{ 1, sizeof(out_mem), &out_mem };
+        cl_mutable_dispatch_config_khr dispatch_config{
+            command,
+            1 /* num_args */,
+            0 /* num_svm_arg */,
+            0 /* num_exec_infos */,
+            0 /* work_dim - 0 means no change to dimensions */,
+            &arg /* arg_list */,
+            nullptr /* arg_svm_list - nullptr means no change*/,
+            nullptr /* exec_info_list */,
+            nullptr /* global_work_offset */,
+            nullptr /* global_work_size */,
+            nullptr /* local_work_size */
+        };
+
+        cl_uint num_configs = 1;
+        cl_command_buffer_update_type_khr config_types[1] = {
+            CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
+        };
+        const void *configs[1] = { &dispatch_config };
+        error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
+                                           config_types, configs);
+        test_error(error, "clUpdateMutableCommandsKHR failed");
+
+        // Now that all the arguments have been set, verify the
+        // command-buffer has entered the executable state.
+        if (!verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR))
+        {
+            return TEST_FAIL;
+        }
+
+        // Execute command-buffer and verify results are expected
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+        if (!verify_result(out_mem)) return TEST_FAIL;
+
+        return TEST_PASS;
+    }
+
+    cl_mutable_command_khr command;
+    std::vector<cl_int> src_data;
+};
+
+} // anonymous namespace
+
+REGISTER_TEST(mutable_dispatch_defer_arguments)
+{
+    return MakeAndRunTest<MutableDispatchDeferArguments>(device, context, queue,
+                                                         num_elements);
+}

From c0e6447961ba7369d2cf8da246cb65dd2cd4c7f3 Mon Sep 17 00:00:00 2001
From: Michael Rizkalla <michael.rizkalla@arm.com>
Date: Thu, 27 Nov 2025 00:20:46 +0000
Subject: [PATCH 12/54] Implement negative tests for
 clEnqueueMarkerWithWaitList (#2502)

This change adds negative tests for `clEnqueueMarkerWithWaitList`

Signed-off-by: Michael Rizkalla <michael.rizkalla@arm.com>
---
 test_conformance/api/CMakeLists.txt           |  1 +
 .../api/negative_enqueue_marker.cpp           | 97 +++++++++++++++++++
 2 files changed, 98 insertions(+)
 create mode 100644 test_conformance/api/negative_enqueue_marker.cpp

diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt
index b781e49b..c0ab77b5 100644
--- a/test_conformance/api/CMakeLists.txt
+++ b/test_conformance/api/CMakeLists.txt
@@ -6,6 +6,7 @@ set(${MODULE_NAME}_SOURCES
          main.cpp
          negative_platform.cpp
          negative_queue.cpp
+         negative_enqueue_marker.cpp
          negative_enqueue_map_image.cpp
          test_api_consistency.cpp
          test_bool.cpp
diff --git a/test_conformance/api/negative_enqueue_marker.cpp b/test_conformance/api/negative_enqueue_marker.cpp
new file mode 100644
index 00000000..182b3dbf
--- /dev/null
+++ b/test_conformance/api/negative_enqueue_marker.cpp
@@ -0,0 +1,97 @@
+//
+// Copyright (c) 2025 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "testBase.h"
+#include "harness/typeWrappers.h"
+
+REGISTER_TEST(negative_enqueue_marker_with_wait_list)
+{
+    cl_platform_id platform = getPlatformFromDevice(device);
+    cl_context_properties props[3] = {
+        CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(platform),
+        0
+    };
+
+    cl_int err = CL_SUCCESS;
+    clContextWrapper ctx =
+        clCreateContext(props, 1, &device, nullptr, nullptr, &err);
+    test_error(err, "clCreateContext failed");
+
+    cl_event ret_event = nullptr;
+
+    err = clEnqueueMarkerWithWaitList(nullptr, 0, nullptr, &ret_event);
+    test_failure_error_ret(err, CL_INVALID_COMMAND_QUEUE,
+                           "clEnqueueMarkerWithWaitList should return "
+                           "CL_INVALID_COMMAND_QUEUE when: \"command_queue is "
+                           "not a valid host command-queue\" using a nullptr",
+                           TEST_FAIL);
+    test_assert_error(ret_event == nullptr,
+                      "if clEnqueueMarkerWithWaitList failed, no ret_event "
+                      "should be created");
+
+    clEventWrapper different_ctx_event = clCreateUserEvent(ctx, &err);
+    test_error(err, "clCreateUserEvent failed");
+
+    err =
+        clEnqueueMarkerWithWaitList(queue, 1, &different_ctx_event, &ret_event);
+    test_failure_error_ret(
+        err, CL_INVALID_CONTEXT,
+        "clEnqueueMarkerWithWaitList should return CL_INVALID_CONTEXT when: "
+        "\"The context of both the command queue and the events in ret_event "
+        "wait list are not the same\"",
+        TEST_FAIL);
+    test_assert_error(ret_event == nullptr,
+                      "if clEnqueueMarkerWithWaitList failed, no ret_event "
+                      "should be created");
+
+    err = clEnqueueMarkerWithWaitList(queue, 1, nullptr, &ret_event);
+    test_failure_error_ret(
+        err, CL_INVALID_EVENT_WAIT_LIST,
+        "clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST "
+        "when: \"num_events_in_wait_list > 0 but event_wait_list is NULL\"",
+        TEST_FAIL);
+    test_assert_error(ret_event == nullptr,
+                      "if clEnqueueMarkerWithWaitList failed, no ret_event "
+                      "should be created");
+
+
+    clEventWrapper event = clCreateUserEvent(context, &err);
+    test_error(err, "clCreateUserEvent failed");
+
+    err = clEnqueueMarkerWithWaitList(queue, 0, &event, &ret_event);
+    test_failure_error_ret(
+        err, CL_INVALID_EVENT_WAIT_LIST,
+        "clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST "
+        "when: \"num_events_in_wait_list is 0 but event_wait_list is not "
+        "NULL\"",
+        TEST_FAIL);
+    test_assert_error(ret_event == nullptr,
+                      "if clEnqueueMarkerWithWaitList failed, no ret_event "
+                      "should be created");
+
+    cl_event invalid_event_wait_list[] = { nullptr };
+    err = clEnqueueMarkerWithWaitList(queue, 1, invalid_event_wait_list,
+                                      &ret_event);
+    test_failure_error_ret(
+        err, CL_INVALID_EVENT_WAIT_LIST,
+        "clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST "
+        "when: \"event objects in event_wait_list are not valid events\"",
+        TEST_FAIL);
+    test_assert_error(ret_event == nullptr,
+                      "if clEnqueueMarkerWithWaitList failed, no ret_event "
+                      "should be created");
+
+    return TEST_PASS;
+}

From 65fd490bc946b12fded871be464c7b8087be06ae Mon Sep 17 00:00:00 2001
From: Nadezhda  Samartseva <88769266+funch0za@users.noreply.github.com>
Date: Thu, 27 Nov 2025 03:23:34 +0300
Subject: [PATCH 13/54] Registration for the profiling_timebase test has been
 changed: version 2.1 is now required. (#2584)

This fix is necessary to install the minimum version of OpenCL in the
profiling_timebase test. This test uses clGetDeviceAndHostTimer,
available only since version 2.1.
fix #2582
---
 test_conformance/profiling/profiling_timebase.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/profiling/profiling_timebase.cpp b/test_conformance/profiling/profiling_timebase.cpp
index f26a9d89..23afcbb8 100644
--- a/test_conformance/profiling/profiling_timebase.cpp
+++ b/test_conformance/profiling/profiling_timebase.cpp
@@ -18,7 +18,7 @@
 
 const char *kernelCode = "__kernel void kernel_empty(){}";
 
-REGISTER_TEST(profiling_timebase)
+REGISTER_TEST_VERSION(profiling_timebase, Version(2, 1))
 {
     Version version = get_device_cl_version(device);
     cl_platform_id platform = getPlatformFromDevice(device);

From 5b2c1acd785787181d407bf6b3bc2e260fd2a456 Mon Sep 17 00:00:00 2001
From: Jose Lopez <joselope@qti.qualcomm.com>
Date: Thu, 27 Nov 2025 00:24:03 +0000
Subject: [PATCH 14/54] Add clFinish to compiler multiple_build_program test
 before building again (#2574)

[clBuildProgram](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#clBuildProgram)
has the next error condition:

>
[CL_INVALID_OPERATION](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#CL_INVALID_OPERATION)
if there are kernel objects attached to program.

The test needs to wait for the NDRangeKernel to finish in order to build
the program again. If not, kernel0 might still have references and
therefore still be attached to program.
---
 test_conformance/compiler/test_compile.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/test_conformance/compiler/test_compile.cpp b/test_conformance/compiler/test_compile.cpp
index b5a134d1..672291ff 100644
--- a/test_conformance/compiler/test_compile.cpp
+++ b/test_conformance/compiler/test_compile.cpp
@@ -3983,6 +3983,9 @@ REGISTER_TEST(multiple_build_program)
         error = clEnqueueNDRangeKernel(queue, kernel0, 1, NULL, &num_threads,
                                        NULL, 0, NULL, NULL);
         test_error(error, "clEnqueueNDRangeKernel failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
     }
 
     {
@@ -4001,10 +4004,10 @@ REGISTER_TEST(multiple_build_program)
         error = clEnqueueNDRangeKernel(queue, kernel1, 1, NULL, &num_threads,
                                        NULL, 0, NULL, NULL);
         test_error(error, "clEnqueueNDRangeKernel failed");
-    }
 
-    error = clFinish(queue);
-    test_error(error, "clFinish failed");
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+    }
 
     std::vector<cl_int> test_values(num_threads, 0);
     error = clEnqueueReadBuffer(queue, out_stream_0, true, 0,

From 2174715160394451e6f6064daf2526ee92926f02 Mon Sep 17 00:00:00 2001
From: vangthao95 <vang.thao@amd.com>
Date: Wed, 26 Nov 2025 16:25:02 -0800
Subject: [PATCH 15/54] Fix incompatiable pointer type warning for
 device_execution test (#2558)

There are multiple incompatiable pointer types warnings when compiling
the device_execution test with clang. There was an upstream llvm patch
that will turn these warnings into an error
https://github.com/llvm/llvm-project/pull/157364.

To not encounter this issue in the future, fix these warnings by
changing the parameter types.

```
warning: incompatible pointer types passing '__global ulong (*)[512]' (aka '__global unsigned long (*)[512]') to parameter of type 'const __generic ulong *' (aka 'const __generic unsigned long *') [-Wincompatible-pointer-types]
   37 |         void (^checkBlock) (void) = ^{ check_res(tid, &value, res); };
      |                                                       ^~~~~~
note: passing argument to parameter 'value' here
--
warning: incompatible pointer types passing '__global int *const __private' to parameter of type '__global atomic_uint *' (aka '__global _Atomic(unsigned int) *') [-Wincompatible-pointer-types]
   10 |   void (^kernelBlock)(void) = ^{ block_fn(len, val); };
      |                                                ^~~
note: passing argument to parameter 'val' here
--
warning: incompatible pointer types passing '__global int *const __private' to parameter of type '__global atomic_uint *' (aka '__global _Atomic(unsigned int) *') [-Wincompatible-pointer-types]
   10 |   void (^kernelBlock)(void) = ^{ block_fn(len, val); };
      |                                                ^~~
note: passing argument to parameter 'val' here
--
warning: incompatible pointer types passing '__global int *const __private' to parameter of type '__global atomic_uint *' (aka '__global _Atomic(unsigned int) *') [-Wincompatible-pointer-types]
   10 |   void (^kernelBlock)(void) = ^{ block_fn(len, val); };
      |                                                ^~~
note: passing argument to parameter 'val' here
--
warning: incompatible pointer types passing '__global int *const __private' to parameter of type '__global atomic_uint *' (aka '__global _Atomic(unsigned int) *') [-Wincompatible-pointer-types]
   10 |   void (^kernelBlock)(void) = ^{ block_fn(len, val); };
      |                                                ^~~
note: passing argument to parameter 'val' here
--
warning: incompatible pointer types passing '__global int *const __private' to parameter of type '__global atomic_uint *' (aka '__global _Atomic(unsigned int) *') [-Wincompatible-pointer-types]
   10 |   void (^kernelBlock)(void) = ^{ block_fn(len, val); };
      |                                                ^~~
note: passing argument to parameter 'val' here
--
warning: incompatible pointer types passing '__global int *const __private' to parameter of type '__global atomic_uint *' (aka '__global _Atomic(unsigned int) *') [-Wincompatible-pointer-types]
   10 |   void (^kernelBlock)(void) = ^{ block_fn(len, val); };
      |                                                ^~~
note: passing argument to parameter 'val' here
```
---
 .../device_execution/enqueue_block.cpp        |  2 +-
 .../device_execution/enqueue_ndrange.cpp      | 24 +++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/test_conformance/device_execution/enqueue_block.cpp b/test_conformance/device_execution/enqueue_block.cpp
index 10d7b86d..eb0ea40f 100644
--- a/test_conformance/device_execution/enqueue_block.cpp
+++ b/test_conformance/device_execution/enqueue_block.cpp
@@ -340,7 +340,7 @@ static const char* enqueue_block_capture_event_profiling_info_before_execution[]
 
         set_user_event_status(user_evt, CL_COMPLETE);
 
-        void (^checkBlock) (void)  = ^{ check_res(tid, &value, res);      };
+        void (^checkBlock) (void)  = ^{ check_res(tid, value, res);      };
 
         enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);
         if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
diff --git a/test_conformance/device_execution/enqueue_ndrange.cpp b/test_conformance/device_execution/enqueue_ndrange.cpp
index 8f71ac4e..4307058a 100644
--- a/test_conformance/device_execution/enqueue_ndrange.cpp
+++ b/test_conformance/device_execution/enqueue_ndrange.cpp
@@ -129,8 +129,8 @@ static const char *helper_ndrange_2d_glo[] = {
     "}" NL,
     "" NL,
     "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
+    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
+    "atomic_uint* val,  __global uint* ofs_arr)" NL,
     "{" NL,
     "  size_t tid = get_global_id(0);" NL,
     "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -156,8 +156,8 @@ static const char *helper_ndrange_2d_loc[] = {
     "}" NL,
     "" NL,
     "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
+    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
+    "atomic_uint* val,  __global uint* ofs_arr)" NL,
     "{" NL,
     "  size_t tid = get_global_id(0);" NL,
     "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -193,8 +193,8 @@ static const char *helper_ndrange_2d_ofs[] = {
     "}" NL,
     "" NL,
     "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
+    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
+    "atomic_uint* val,  __global uint* ofs_arr)" NL,
     "{" NL,
     "  size_t tid = get_global_id(0);" NL,
     "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -233,8 +233,8 @@ static const char *helper_ndrange_3d_glo[] = {
     "}" NL,
     "" NL,
     "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
+    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
+    "atomic_uint* val,  __global uint* ofs_arr)" NL,
     "{" NL,
     "  size_t tid = get_global_id(0);" NL,
     "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -266,8 +266,8 @@ static const char *helper_ndrange_3d_loc[] = {
     "}" NL,
     "" NL,
     "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
+    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
+    "atomic_uint* val,  __global uint* ofs_arr)" NL,
     "{" NL,
     "  size_t tid = get_global_id(0);" NL,
     "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -306,8 +306,8 @@ static const char *helper_ndrange_3d_ofs[] = {
     "}" NL,
     "" NL,
     "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
+    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
+    "atomic_uint* val,  __global uint* ofs_arr)" NL,
     "{" NL,
     "  size_t tid = get_global_id(0);" NL,
     "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,

From d1b75bd200b66069f02ffe25bb4afcb0e515d9f9 Mon Sep 17 00:00:00 2001
From: alan-baker <alanbaker@google.com>
Date: Wed, 26 Nov 2025 19:25:46 -0500
Subject: [PATCH 16/54] c11_atomics: Filter invalid memory orders for 2 order
 instructions (#2506)

* OpenCL C (and SPIR-V) require that the failure memory order is not
stronger than the success memory order.

Also see Khronos internal memory model issue #181

CC @bashbaug
---
 test_conformance/c11_atomics/common.h | 36 +++++++++++++++++++--------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index aee5173d..09535a88 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -702,6 +702,28 @@ public:
                                            cl_context context,
                                            cl_command_queue queue)
     {
+        // Comparator for orders and scopes.
+        const auto checkValidity = [](TExplicitMemoryOrderType success,
+                                      TExplicitMemoryOrderType failure,
+                                      TExplicitMemoryScopeType scope) {
+            // Both memory order arguments must be set (or neither).
+            if ((success == MEMORY_ORDER_EMPTY || failure == MEMORY_ORDER_EMPTY)
+                && success != failure)
+                return false;
+
+            // Memory scope without memory order is disallowed.
+            if (success == MEMORY_ORDER_EMPTY && scope != MEMORY_SCOPE_EMPTY)
+                return false;
+
+            // Failure must not be release or acq_rel.
+            if (failure == MEMORY_ORDER_RELEASE
+                || failure == MEMORY_ORDER_ACQ_REL)
+                return false;
+
+            // Failure must not be stronger than success.
+            return failure <= success;
+        };
+
         // repeat test for each reasonable memory order/scope combination
         std::vector<TExplicitMemoryOrderType> memoryOrder;
         std::vector<TExplicitMemoryScopeType> memoryScope;
@@ -719,16 +741,10 @@ public:
             {
                 for (unsigned si = 0; si < memoryScope.size(); si++)
                 {
-                    if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY
-                         || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
-                        && memoryOrder[oi] != memoryOrder[o2i])
-                        continue; // both memory order arguments must be set (or
-                                  // none)
-                    if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY
-                         || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
-                        && memoryScope[si] != MEMORY_SCOPE_EMPTY)
-                        continue; // memory scope without memory order is not
-                                  // allowed
+                    if (!checkValidity(memoryOrder[oi], memoryOrder[o2i],
+                                       memoryScope[si]))
+                        continue;
+
                     MemoryOrder(memoryOrder[oi]);
                     MemoryOrder2(memoryOrder[o2i]);
                     MemoryScope(memoryScope[si]);

From 5846e9bc593deec944917eb1e0b7067ab6fcaff4 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin@google.com>
Date: Thu, 27 Nov 2025 08:42:35 +0100
Subject: [PATCH 17/54] include algorithm for std::min(list) (#2587)

std::min(list) is defined in algorithm. Some setup might work without
it, but other require it to find the proper definition.

Otherwise it can lead to compilation error:
```
OpenCL-CTS/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp:141:22: error: no matching function for call to 'min'
  141 |         group_size = std::min(
      |                      ^~~~~~~~
/usr/include/c++/14/bits/stl_algobase.h:233:5: note: candidate function template not viable: requires 2 arguments, but 1 was provided
  233 |     min(const _Tp& __a, const _Tp& __b)
      |     ^   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/usr/include/c++/14/bits/stl_algobase.h:281:5: note: candidate function template not viable: requires 3 arguments, but 1 was provided
  281 |     min(const _Tp& __a, const _Tp& __b, _Compare __comp)
      |     ^   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1 error generated.
```
---
 .../mutable_command_full_dispatch.cpp                            | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp
index 347f6407..8919d5c2 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp
@@ -20,6 +20,7 @@
 #include <CL/cl.h>
 #include <CL/cl_ext.h>
 
+#include <algorithm>
 #include <vector>
 
 namespace {

From 2bb364bb1dcfe14fa81d47632316ce2ec2ade07b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 1 Dec 2025 07:10:53 -0800
Subject: [PATCH 18/54] Bump actions/checkout from 5 to 6 in the github-actions
 group (#2589)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps the github-actions group with 1 update:
[actions/checkout](https://github.com/actions/checkout).

Updates `actions/checkout` from 5 to 6
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/checkout/releases">actions/checkout's
releases</a>.</em></p>
<blockquote>
<h2>v6.0.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Update README to include Node.js 24 support details and requirements
by <a href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a>
in <a
href="https://redirect.github.com/actions/checkout/pull/2248">actions/checkout#2248</a></li>
<li>Persist creds to a separate file by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2286">actions/checkout#2286</a></li>
<li>v6-beta by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2298">actions/checkout#2298</a></li>
<li>update readme/changelog for v6 by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2311">actions/checkout#2311</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/checkout/compare/v5.0.0...v6.0.0">https://github.com/actions/checkout/compare/v5.0.0...v6.0.0</a></p>
<h2>v6-beta</h2>
<h2>What's Changed</h2>
<p>Updated persist-credentials to store the credentials under
<code>$RUNNER_TEMP</code> instead of directly in the local git
config.</p>
<p>This requires a minimum Actions Runner version of <a
href="https://github.com/actions/runner/releases/tag/v2.329.0">v2.329.0</a>
to access the persisted credentials for <a
href="https://docs.github.com/en/actions/tutorials/use-containerized-services/create-a-docker-container-action">Docker
container action</a> scenarios.</p>
<h2>v5.0.1</h2>
<h2>What's Changed</h2>
<ul>
<li>Port v6 cleanup to v5 by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2301">actions/checkout#2301</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/checkout/compare/v5...v5.0.1">https://github.com/actions/checkout/compare/v5...v5.0.1</a></p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/actions/checkout/blob/main/CHANGELOG.md">actions/checkout's
changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<h2>V6.0.0</h2>
<ul>
<li>Persist creds to a separate file by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2286">actions/checkout#2286</a></li>
<li>Update README to include Node.js 24 support details and requirements
by <a href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a>
in <a
href="https://redirect.github.com/actions/checkout/pull/2248">actions/checkout#2248</a></li>
</ul>
<h2>V5.0.1</h2>
<ul>
<li>Port v6 cleanup to v5 by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2301">actions/checkout#2301</a></li>
</ul>
<h2>V5.0.0</h2>
<ul>
<li>Update actions checkout to use node 24 by <a
href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2226">actions/checkout#2226</a></li>
</ul>
<h2>V4.3.1</h2>
<ul>
<li>Port v6 cleanup to v4 by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2305">actions/checkout#2305</a></li>
</ul>
<h2>V4.3.0</h2>
<ul>
<li>docs: update README.md by <a
href="https://github.com/motss"><code>@​motss</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1971">actions/checkout#1971</a></li>
<li>Add internal repos for checking out multiple repositories by <a
href="https://github.com/mouismail"><code>@​mouismail</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1977">actions/checkout#1977</a></li>
<li>Documentation update - add recommended permissions to Readme by <a
href="https://github.com/benwells"><code>@​benwells</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2043">actions/checkout#2043</a></li>
<li>Adjust positioning of user email note and permissions heading by <a
href="https://github.com/joshmgross"><code>@​joshmgross</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2044">actions/checkout#2044</a></li>
<li>Update README.md by <a
href="https://github.com/nebuk89"><code>@​nebuk89</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2194">actions/checkout#2194</a></li>
<li>Update CODEOWNERS for actions by <a
href="https://github.com/TingluoHuang"><code>@​TingluoHuang</code></a>
in <a
href="https://redirect.github.com/actions/checkout/pull/2224">actions/checkout#2224</a></li>
<li>Update package dependencies by <a
href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2236">actions/checkout#2236</a></li>
</ul>
<h2>v4.2.2</h2>
<ul>
<li><code>url-helper.ts</code> now leverages well-known environment
variables by <a href="https://github.com/jww3"><code>@​jww3</code></a>
in <a
href="https://redirect.github.com/actions/checkout/pull/1941">actions/checkout#1941</a></li>
<li>Expand unit test coverage for <code>isGhes</code> by <a
href="https://github.com/jww3"><code>@​jww3</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1946">actions/checkout#1946</a></li>
</ul>
<h2>v4.2.1</h2>
<ul>
<li>Check out other refs/* by commit if provided, fall back to ref by <a
href="https://github.com/orhantoy"><code>@​orhantoy</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1924">actions/checkout#1924</a></li>
</ul>
<h2>v4.2.0</h2>
<ul>
<li>Add Ref and Commit outputs by <a
href="https://github.com/lucacome"><code>@​lucacome</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1180">actions/checkout#1180</a></li>
<li>Dependency updates by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>- <a
href="https://redirect.github.com/actions/checkout/pull/1777">actions/checkout#1777</a>,
<a
href="https://redirect.github.com/actions/checkout/pull/1872">actions/checkout#1872</a></li>
</ul>
<h2>v4.1.7</h2>
<ul>
<li>Bump the minor-npm-dependencies group across 1 directory with 4
updates by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1739">actions/checkout#1739</a></li>
<li>Bump actions/checkout from 3 to 4 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1697">actions/checkout#1697</a></li>
<li>Check out other refs/* by commit by <a
href="https://github.com/orhantoy"><code>@​orhantoy</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1774">actions/checkout#1774</a></li>
<li>Pin actions/checkout's own workflows to a known, good, stable
version. by <a href="https://github.com/jww3"><code>@​jww3</code></a> in
<a
href="https://redirect.github.com/actions/checkout/pull/1776">actions/checkout#1776</a></li>
</ul>
<h2>v4.1.6</h2>
<ul>
<li>Check platform to set archive extension appropriately by <a
href="https://github.com/cory-miller"><code>@​cory-miller</code></a> in
<a
href="https://redirect.github.com/actions/checkout/pull/1732">actions/checkout#1732</a></li>
</ul>
<h2>v4.1.5</h2>
<ul>
<li>Update NPM dependencies by <a
href="https://github.com/cory-miller"><code>@​cory-miller</code></a> in
<a
href="https://redirect.github.com/actions/checkout/pull/1703">actions/checkout#1703</a></li>
<li>Bump github/codeql-action from 2 to 3 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1694">actions/checkout#1694</a></li>
<li>Bump actions/setup-node from 1 to 4 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1696">actions/checkout#1696</a></li>
<li>Bump actions/upload-artifact from 2 to 4 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1695">actions/checkout#1695</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/actions/checkout/commit/1af3b93b6815bc44a9784bd300feb67ff0d1eeb3"><code>1af3b93</code></a>
update readme/changelog for v6 (<a
href="https://redirect.github.com/actions/checkout/issues/2311">#2311</a>)</li>
<li><a
href="https://github.com/actions/checkout/commit/71cf2267d89c5cb81562390fa70a37fa40b1305e"><code>71cf226</code></a>
v6-beta (<a
href="https://redirect.github.com/actions/checkout/issues/2298">#2298</a>)</li>
<li><a
href="https://github.com/actions/checkout/commit/069c6959146423d11cd0184e6accf28f9d45f06e"><code>069c695</code></a>
Persist creds to a separate file (<a
href="https://redirect.github.com/actions/checkout/issues/2286">#2286</a>)</li>
<li><a
href="https://github.com/actions/checkout/commit/ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493"><code>ff7abcd</code></a>
Update README to include Node.js 24 support details and requirements (<a
href="https://redirect.github.com/actions/checkout/issues/2248">#2248</a>)</li>
<li>See full diff in <a
href="https://github.com/actions/checkout/compare/v5...v6">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/checkout&package-manager=github_actions&previous-version=5&new-version=6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore <dependency name> major version` will close this
group update PR and stop Dependabot creating any more for the specific
dependency's major version (unless you unignore this specific
dependency's major version or upgrade to it yourself)
- `@dependabot ignore <dependency name> minor version` will close this
group update PR and stop Dependabot creating any more for the specific
dependency's minor version (unless you unignore this specific
dependency's minor version or upgrade to it yourself)
- `@dependabot ignore <dependency name>` will close this group update PR
and stop Dependabot creating any more for the specific dependency
(unless you unignore this specific dependency or upgrade to it yourself)
- `@dependabot unignore <dependency name>` will remove all of the ignore
conditions of the specified dependency
- `@dependabot unignore <dependency name> <ignore condition>` will
remove the ignore condition of the specified dependency and ignore
conditions


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/presubmit.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
index 3e05d0c8..e22692f3 100644
--- a/.github/workflows/presubmit.yml
+++ b/.github/workflows/presubmit.yml
@@ -28,7 +28,7 @@ jobs:
             arch: android-aarch64
             android_arch_abi: arm64-v8a
     steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v6
       - name: Setup Ninja
         uses: seanmiddleditch/gha-setup-ninja@master
       - name: Install Arm and AArch64 compilers
@@ -184,7 +184,7 @@ jobs:
     steps:
       - name: Install packages
         run: sudo apt install -y clang-format clang-format-14
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v6
         with:
           fetch-depth: 0
       - name: Check code format

From 0cca0ee8693af503c3d385645e91923a2b55e107 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 2 Dec 2025 17:39:16 +0100
Subject: [PATCH 19/54] Added support for cl_ext_float_atomics in
 CBasicTestFetchSub with atomic_double (#2368)

Related to #2142, according to the work plan, extending
CBasicTestFetchSub with support for atomic_double.
---
 test_conformance/c11_atomics/host_atomics.h   |   5 +-
 test_conformance/c11_atomics/main.cpp         |   1 -
 test_conformance/c11_atomics/test_atomics.cpp | 121 ++++++++++++++----
 3 files changed, 102 insertions(+), 25 deletions(-)

diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h
index 9a33f26c..fac21642 100644
--- a/test_conformance/c11_atomics/host_atomics.h
+++ b/test_conformance/c11_atomics/host_atomics.h
@@ -135,7 +135,10 @@ template <typename AtomicType, typename CorrespondingType>
 CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
                                         TExplicitMemoryOrderType order)
 {
-    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
+    if constexpr (
+        std::is_same_v<
+            AtomicType,
+            HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
     {
         static std::mutex mx;
         std::lock_guard<std::mutex> lock(mx);
diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp
index e2f1888f..78291f06 100644
--- a/test_conformance/c11_atomics/main.cpp
+++ b/test_conformance/c11_atomics/main.cpp
@@ -134,7 +134,6 @@ test_status InitCL(cl_device_id device) {
     if (is_extension_available(device, "cl_ext_float_atomics"))
     {
         gFloatAtomicsSupported = true;
-
         if (is_extension_available(device, "cl_khr_fp64"))
         {
             cl_int error = clGetDeviceInfo(
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index f46520ca..56f350ad 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -1647,6 +1647,23 @@ REGISTER_TEST(svm_atomic_fetch_add)
                                          true);
 }
 
+template <typename T> double kahan_sub(const std::vector<T> &nums)
+{
+    return 0.0;
+}
+template <> double kahan_sub<double>(const std::vector<double> &nums)
+{
+    double sum = 0.0;
+    double compensation = 0.0;
+    for (double num : nums)
+    {
+        double y = num - compensation;
+        double t = sum - y;
+        compensation = (t - sum) - y;
+        sum = t;
+    }
+    return sum;
+}
 template <typename HostAtomicType, typename HostDataType>
 class CBasicTestFetchSub
     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
@@ -1671,7 +1688,7 @@ public:
         if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
         {
             StartValue(0);
             CBasicTestMemOrderScope<HostAtomicType,
@@ -1700,13 +1717,18 @@ public:
     bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                       MTdata d) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             if (threadCount > ref_vals.size())
             {
                 ref_vals.resize(threadCount);
+
                 for (cl_uint i = 0; i < threadCount; i++)
-                    ref_vals[i] = get_random_float(min_range, max_range, d);
+                    ref_vals[i] = (HostDataType)get_random_double(min_range,
+                                                                  max_range, d);
 
                 memcpy(startRefValues, ref_vals.data(),
                        sizeof(HostDataType) * ref_vals.size());
@@ -1714,22 +1736,52 @@ public:
                 // Estimate highest possible subtraction error for given set.
                 std::vector<HostDataType> sums;
                 std::sort(ref_vals.begin(), ref_vals.end());
-                sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
 
-                std::sort(
-                    ref_vals.begin(), ref_vals.end(),
-                    [](float a, float b) { return std::abs(a) < std::abs(b); });
+                if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
+                {
+                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
 
-                double precise = 0.0;
-                for (auto elem : ref_vals) precise += double(elem);
-                sums.push_back(precise);
-                sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
+                    sums.push_back(
+                        subtract(ref_vals.rbegin(), ref_vals.rend()));
 
-                std::sort(sums.begin(), sums.end());
-                max_error =
-                    std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
+                    std::sort(ref_vals.begin(), ref_vals.end(),
+                              [](double a, double b) {
+                                  return std::abs(a) < std::abs(b);
+                              });
+
+                    double precise = kahan_sub(ref_vals);
+                    sums.push_back(precise);
+
+                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
+
+                    sums.push_back(
+                        subtract(ref_vals.rbegin(), ref_vals.rend()));
+
+                    std::sort(sums.begin(), sums.end());
+                    max_error = std::abs((double)sums.front() - sums.back());
+                }
+                else
+                {
+                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
+                    sums.push_back(
+                        subtract(ref_vals.rbegin(), ref_vals.rend()));
+
+                    std::sort(ref_vals.begin(), ref_vals.end(),
+                              [](float a, float b) {
+                                  return std::abs(a) < std::abs(b);
+                              });
+
+                    double precise = 0.0;
+                    for (auto elem : ref_vals) precise += double(elem);
+                    sums.push_back(precise);
+                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
+                    sums.push_back(
+                        subtract(ref_vals.rbegin(), ref_vals.rend()));
+
+                    std::sort(sums.begin(), sums.end());
+                    max_error =
+                        std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
+                }
 
                 // restore unsorted order
                 memcpy(ref_vals.data(), startRefValues,
@@ -1742,7 +1794,7 @@ public:
             }
             return true;
         }
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
         {
             if (threadCount > ref_vals.size())
             {
@@ -1804,7 +1856,7 @@ public:
         if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             return "  atomic_fetch_sub" + postfix + "(&destMemory[0], ("
                 + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1829,7 +1881,7 @@ public:
         if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
                                   MemoryOrder());
@@ -1851,7 +1903,11 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             if (whichDestValue == 0)
                 for (cl_uint i = 0; i < threadCount; i++)
@@ -1882,7 +1938,10 @@ public:
                              const std::vector<HostAtomicType> &testValues,
                              cl_uint whichDestValue) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             if (whichDestValue == 0)
                 return std::abs((HOST_ATOMIC_FLOAT)expected
@@ -1927,7 +1986,18 @@ public:
     int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                           cl_command_queue queue) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
+        {
+            if (LocalMemory()
+                && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
+                    == 0)
+                return 0;
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             if (LocalMemory()
                 && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
@@ -1956,7 +2026,7 @@ public:
         if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             return threadCount;
         }
@@ -1991,6 +2061,11 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID,
 
     if (gFloatAtomicsSupported)
     {
+        CBasicTestFetchSub<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
+            TYPE_ATOMIC_DOUBLE, useSVM);
+        EXECUTE_TEST(
+            error, test_double.Execute(deviceID, context, queue, num_elements));
+
         CBasicTestFetchSub<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
             TYPE_ATOMIC_FLOAT, useSVM);
         EXECUTE_TEST(

From 0c064ac0177db313e31fa4a83a284bb2abb60e51 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 2 Dec 2025 17:41:36 +0100
Subject: [PATCH 20/54] Added a test case for printing an empty string (#2590)

Fixes #2364 according to issue description
---
 test_conformance/printf/util_printf.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp
index 83a21fbb..0768adb5 100644
--- a/test_conformance/printf/util_printf.cpp
+++ b/test_conformance/printf/util_printf.cpp
@@ -1032,6 +1032,9 @@ testCase testCaseChar = {
 
 std::vector<printDataGenParameters> printStringGenParameters = {
 
+    // empty format, no data representation
+    { {""} },
+
     // empty format
     { {""}, "\"foo\"" },
 
@@ -1094,6 +1097,8 @@ std::vector<std::string> correctBufferString = {
 
     "",
 
+    "",
+
     " foo",
 
     "f",

From 011caecb575a31aff1f8afd69d7fe7becd698a26 Mon Sep 17 00:00:00 2001
From: Michael Rizkalla <michael.rizkalla@arm.com>
Date: Tue, 2 Dec 2025 17:34:45 +0000
Subject: [PATCH 21/54] Implement negative tests for cl_device_id API functions
 (#2495)

Signed-off-by: Michael Rizkalla <michael.rizkalla@arm.com>
Co-authored-by: Chetankumar Mistry <chetan.mistry@arm.com>
---
 test_common/harness/parseParameters.cpp    |  34 ++
 test_common/harness/testHarness.cpp        |   1 +
 test_common/harness/testHarness.h          |  32 ++
 test_conformance/api/CMakeLists.txt        |   1 +
 test_conformance/api/negative_device.cpp   | 526 +++++++++++++++++++++
 test_conformance/api/negative_platform.cpp |  15 +-
 6 files changed, 607 insertions(+), 2 deletions(-)
 create mode 100644 test_conformance/api/negative_device.cpp

diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp
index 65167116..97aedb7c 100644
--- a/test_common/harness/parseParameters.cpp
+++ b/test_common/harness/parseParameters.cpp
@@ -58,6 +58,13 @@ void helpInfo()
         with a very small subset of the tests. This option should not be used
         for conformance submission (default: disabled).
 
+    --invalid-object-scenarios=<option_1>,<option_2>....
+        Specify different scenarios to use when
+        testing for object validity. Options can be:
+           nullptr                 To use a nullptr (default)
+           valid_object_wrong_type To use a valid_object which is not the correct type
+        NOTE: valid_object_wrong_type option is not required for OpenCL conformance.
+
 For offline compilation (binary and spir-v modes) only:
     --compilation-cache-mode <cache-mode>
         Specify a compilation caching mode:
@@ -104,6 +111,7 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
         }
 
         delArg = 0;
+        size_t i_object_length = strlen("--invalid-object-scenarios=");
 
         if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0)
         {
@@ -264,6 +272,32 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
                 return -1;
             }
         }
+        else if (!strncmp(argv[i],
+                          "--invalid-object-scenarios=", i_object_length))
+        {
+            if (strlen(argv[i]) > i_object_length)
+            {
+                delArg++;
+                gInvalidObject = 0;
+                std::string invalid_objects(argv[i]);
+
+                if (invalid_objects.find("nullptr") != std::string::npos)
+                {
+                    gInvalidObject |= InvalidObject::Nullptr;
+                }
+                if (invalid_objects.find("valid_object_wrong_type")
+                    != std::string::npos)
+                {
+                    gInvalidObject |= InvalidObject::ValidObjectWrongType;
+                }
+            }
+            else
+            {
+                log_error("Program argument for --invalid-object-scenarios was "
+                          "not specified.\n");
+                return -1;
+            }
+        }
 
         // cleaning parameters from argv tab
         for (int j = i; j < argc - delArg; j++) argv[j] = argv[j + delArg];
diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index c745a639..301b86d0 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -60,6 +60,7 @@ int gInfNanSupport = 1;
 int gIsEmbedded = 0;
 int gHasLong = 1;
 bool gCoreILProgram = true;
+int gInvalidObject = InvalidObject::Nullptr;
 
 #define DEFAULT_NUM_ELEMENTS 0x4000
 
diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h
index cc9d8212..76fda76f 100644
--- a/test_common/harness/testHarness.h
+++ b/test_common/harness/testHarness.h
@@ -22,6 +22,7 @@
 
 #include <string>
 #include <vector>
+#include <type_traits>
 
 class Version {
 public:
@@ -257,6 +258,37 @@ extern std::string get_platform_info_string(cl_platform_id platform,
                                             cl_platform_info param_name);
 extern bool is_platform_extension_available(cl_platform_id platform,
                                             const char *extensionName);
+enum InvalidObject
+{
+    Nullptr = 1 << 0,
+    ValidObjectWrongType = 1 << 1,
+};
+
+extern int gInvalidObject;
+
+
+template <typename T> std::vector<T> get_invalid_objects(cl_device_id device)
+{
+    std::vector<T> ret;
+    if ((gInvalidObject & InvalidObject::Nullptr)
+        && !(std::is_same<T, cl_platform_id>::value))
+    {
+        ret.push_back(nullptr);
+    }
+    if (gInvalidObject & InvalidObject::ValidObjectWrongType)
+    {
+        if (std::is_same<T, cl_device_id>::value)
+        {
+            cl_platform_id platform = getPlatformFromDevice(device);
+            ret.push_back(reinterpret_cast<T>(platform));
+        }
+        else
+        {
+            ret.push_back(reinterpret_cast<T>(device));
+        }
+    }
+    return ret;
+}
 
 #if !defined(__APPLE__)
 void memset_pattern4(void *, const void *, size_t);
diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt
index c0ab77b5..942fef89 100644
--- a/test_conformance/api/CMakeLists.txt
+++ b/test_conformance/api/CMakeLists.txt
@@ -8,6 +8,7 @@ set(${MODULE_NAME}_SOURCES
          negative_queue.cpp
          negative_enqueue_marker.cpp
          negative_enqueue_map_image.cpp
+         negative_device.cpp
          test_api_consistency.cpp
          test_bool.cpp
          test_retain.cpp
diff --git a/test_conformance/api/negative_device.cpp b/test_conformance/api/negative_device.cpp
new file mode 100644
index 00000000..d51c685c
--- /dev/null
+++ b/test_conformance/api/negative_device.cpp
@@ -0,0 +1,526 @@
+//
+// Copyright (c) 2021 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "testBase.h"
+#include "harness/testHarness.h"
+#include <vector>
+
+/* Negative Tests for clGetDeviceInfo */
+REGISTER_TEST(negative_get_device_info)
+{
+
+    cl_device_type device_type = 0;
+    cl_int err(CL_SUCCESS);
+    for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
+    {
+        err = clGetDeviceInfo(invalid_device, CL_DEVICE_TYPE,
+                              sizeof(device_type), &device_type, nullptr);
+        test_failure_error_ret(err, CL_INVALID_DEVICE,
+                               "clGetDeviceInfo should return "
+                               "CL_INVALID_DEVICE when: \"device is not "
+                               "a valid device\"",
+                               TEST_FAIL);
+    }
+
+    constexpr cl_device_info INVALID_PARAM_VALUE = 0;
+    err = clGetDeviceInfo(device, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clGetDeviceInfo should return CL_INVALID_VALUE when: \"param_name is "
+        "not one of the supported values\"",
+        TEST_FAIL);
+
+    err = clGetDeviceInfo(device, CL_DEVICE_TYPE, 0, &device_type, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clGetDeviceInfo should return CL_INVALID_VALUE when: \"size in bytes "
+        "specified by param_value_size is < size of return type and "
+        "param_value is not a NULL value\"",
+        TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clGetDeviceIDs */
+REGISTER_TEST(negative_get_device_ids)
+{
+    cl_platform_id platform = getPlatformFromDevice(device);
+
+    cl_device_id devices = nullptr;
+
+    cl_int err(CL_SUCCESS);
+    for (auto invalid_platform : get_invalid_objects<cl_platform_id>(device))
+    {
+        err = clGetDeviceIDs(invalid_platform, CL_DEVICE_TYPE_DEFAULT, 1,
+                             &devices, nullptr);
+        test_failure_error_ret(err, CL_INVALID_PLATFORM,
+                               "clGetDeviceIDs should return "
+                               "CL_INVALID_PLATFORM when: \"platform is "
+                               "not a valid platform\"",
+                               TEST_FAIL);
+    }
+
+    cl_device_type INVALID_DEVICE_TYPE = 0;
+    err = clGetDeviceIDs(platform, INVALID_DEVICE_TYPE, 1, &devices, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_DEVICE_TYPE,
+        "clGetDeviceIDs should return CL_INVALID_DEVICE_TYPE when: "
+        "\"device_type is not a valid value\"",
+        TEST_FAIL);
+
+    err =
+        clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 0, &devices, nullptr);
+    test_failure_error_ret(err, CL_INVALID_VALUE,
+                           "clGetDeviceIDs should return when: \"num_entries "
+                           "is equal to zero and devices is not NULL\"",
+                           TEST_FAIL);
+
+    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, nullptr, nullptr);
+    test_failure_error_ret(err, CL_INVALID_VALUE,
+                           "clGetDeviceIDs should return CL_INVALID_VALUE "
+                           "when: \"both num_devices and devices are NULL\"",
+                           TEST_FAIL);
+
+    devices = nullptr;
+    std::vector<cl_device_type> device_types{ CL_DEVICE_TYPE_CPU,
+                                              CL_DEVICE_TYPE_GPU,
+                                              CL_DEVICE_TYPE_ACCELERATOR };
+    if (get_device_cl_version(device) >= Version(1, 2))
+    {
+        device_types.push_back(CL_DEVICE_TYPE_CUSTOM);
+    }
+
+    bool platform_supports_all_device_types = true;
+    for (auto device_type : device_types)
+    {
+        err = clGetDeviceIDs(platform, device_type, 1, &devices, nullptr);
+        if (err == CL_SUCCESS)
+        {
+            continue;
+        }
+        platform_supports_all_device_types = false;
+        break;
+    }
+    if (platform_supports_all_device_types)
+    {
+        log_info("Platform has every Device Type... Skipping Test\n");
+    }
+    else
+    {
+        test_failure_error_ret(
+            err, CL_DEVICE_NOT_FOUND,
+            "clGetDeviceIDs should return CL_DEVICE_NOT_FOUND when: \"no "
+            "OpenCL devices that matched device_type were found\"",
+            TEST_FAIL);
+    }
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clGetDeviceAndHostTimer */
+REGISTER_TEST_VERSION(negative_get_device_and_host_timer, Version(2, 1))
+{
+    cl_ulong *device_timestamp = nullptr, *host_timestamp = nullptr;
+    cl_int err = CL_SUCCESS;
+    for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
+    {
+        err = clGetDeviceAndHostTimer(invalid_device, device_timestamp,
+                                      host_timestamp);
+        test_failure_error_ret(
+            err, CL_INVALID_DEVICE,
+            "clGetDeviceAndHostTimer should return CL_INVALID_DEVICE when: "
+            "\"device is not a valid device\"",
+            TEST_FAIL);
+    }
+
+    cl_platform_id platform = getPlatformFromDevice(device);
+
+    // Initialise timer_resolution to a Non-0 value as CL2.1/2 devices must
+    // support timer synchronisation
+    cl_ulong timer_resolution = 1;
+    auto device_version = get_device_cl_version(device);
+    err =
+        clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
+                          sizeof(timer_resolution), &timer_resolution, nullptr);
+    test_error(err, "clGetPlatformInfo failed");
+    if (timer_resolution == 0
+        && (device_version == Version(2, 1) || device_version == Version(2, 2)))
+    {
+        log_error("Support for device and host timer synchronization is "
+                  "required for platforms supporting OpenCL 2.1 or 2.2.");
+        return TEST_FAIL;
+    }
+
+    if (timer_resolution != 0)
+    {
+        log_info("Platform Supports Timers\n");
+        log_info("Skipping CL_INVALID_OPERATION tests\n");
+
+        err = clGetDeviceAndHostTimer(device, nullptr, host_timestamp);
+        test_failure_error_ret(
+            err, CL_INVALID_VALUE,
+            "clGetDeviceAndHostTimer should return CL_INVALID_VALUE when: "
+            "\"host_timestamp or device_timestamp is NULL\" using nullptr for "
+            "device_timestamp ",
+            TEST_FAIL);
+
+        err = clGetDeviceAndHostTimer(device, device_timestamp, nullptr);
+        test_failure_error_ret(
+            err, CL_INVALID_VALUE,
+            "clGetDeviceAndHostTimer should return CL_INVALID_VALUE when: "
+            "\"host_timestamp or device_timestamp is NULL\" using nullptr for "
+            "host_timestamp ",
+            TEST_FAIL);
+    }
+    else
+    {
+        log_info("Platform does not Support Timers\n");
+        log_info("Skipping CL_INVALID_VALUE tests\n");
+
+        err = clGetDeviceAndHostTimer(device, device_timestamp, host_timestamp);
+        test_failure_error_ret(
+            err, CL_INVALID_OPERATION,
+            "clGetDeviceAndHostTimer should return CL_INVALID_OPERATION when: "
+            "\"the platform associated with device does not support device and "
+            "host timer synchronization\"",
+            TEST_FAIL);
+    }
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clGetHostTimer */
+REGISTER_TEST_VERSION(negative_get_host_timer, Version(2, 1))
+{
+    cl_ulong host_timestamp = 0;
+    cl_int err = CL_SUCCESS;
+    for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
+    {
+        err = clGetHostTimer(invalid_device, &host_timestamp);
+        test_failure_error_ret(err, CL_INVALID_DEVICE,
+                               "clGetHostTimer should return CL_INVALID_DEVICE "
+                               "when: \"device is not "
+                               "a valid device\"",
+                               TEST_FAIL);
+    }
+
+    cl_platform_id platform = getPlatformFromDevice(device);
+    // Initialise timer_resolution to a Non-0 value as CL2.1/2 devices must
+    // support timer synchronisation
+    cl_ulong timer_resolution = 1;
+    auto device_version = get_device_cl_version(device);
+    err =
+        clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
+                          sizeof(timer_resolution), &timer_resolution, nullptr);
+    test_error(err, "clGetPlatformInfo failed");
+    if (timer_resolution == 0
+        && (device_version == Version(2, 1) || device_version == Version(2, 2)))
+    {
+        log_error("Support for device and host timer synchronization is "
+                  "required for platforms supporting OpenCL 2.1 or 2.2.");
+        return TEST_FAIL;
+    }
+
+    if (timer_resolution != 0)
+    {
+        log_info("Platform Supports Timers\n");
+        log_info("Skipping CL_INVALID_OPERATION tests\n");
+
+        err = clGetHostTimer(device, nullptr);
+        test_failure_error_ret(err, CL_INVALID_VALUE,
+                               "clGetHostTimer should return CL_INVALID_VALUE "
+                               "when: \"host_timestamp is NULL\"",
+                               TEST_FAIL);
+    }
+    else
+    {
+        log_info("Platform does not Support Timers\n");
+        log_info("Skipping CL_INVALID_VALUE tests\n");
+
+        err = clGetHostTimer(device, &host_timestamp);
+        test_failure_error_ret(
+            err, CL_INVALID_OPERATION,
+            "clGetHostTimer should return CL_INVALID_OPERATION when: \"the "
+            "platform associated with device does not support device and host "
+            "timer synchronization\"",
+            TEST_FAIL);
+    }
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clCreateSubDevices */
+enum SupportedPartitionSchemes
+{
+    None = 0,
+    Equally = 1 << 0,
+    Counts = 1 << 1,
+    Affinity = 1 << 2,
+    All_Schemes = Affinity | Counts | Equally,
+};
+
+static int get_supported_properties(cl_device_id device)
+{
+    size_t number_of_properties = 0;
+    int err = clGetDeviceInfo(device, CL_DEVICE_PARTITION_PROPERTIES, 0,
+                              nullptr, &number_of_properties);
+    test_error(err, "clGetDeviceInfo");
+    std::vector<cl_device_partition_property> supported_properties(
+        number_of_properties / sizeof(cl_device_partition_property));
+    err = clGetDeviceInfo(device, CL_DEVICE_PARTITION_PROPERTIES,
+                          number_of_properties, &supported_properties.front(),
+                          nullptr);
+    test_error(err, "clGetDeviceInfo");
+    int ret = SupportedPartitionSchemes::None;
+    for (auto property : supported_properties)
+    {
+        switch (property)
+        {
+            case CL_DEVICE_PARTITION_EQUALLY:
+                ret |= SupportedPartitionSchemes::Equally;
+                break;
+            case CL_DEVICE_PARTITION_BY_COUNTS:
+                ret |= SupportedPartitionSchemes::Counts;
+                break;
+            case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN:
+                ret |= SupportedPartitionSchemes::Affinity;
+                break;
+            default: break;
+        }
+    }
+    return ret;
+}
+
+static std::vector<cl_device_partition_property>
+get_invalid_properties(int unsupported_properties)
+{
+    if (unsupported_properties & SupportedPartitionSchemes::Equally)
+    {
+        return { CL_DEVICE_PARTITION_EQUALLY, 1, 0 };
+    }
+    else if (unsupported_properties & SupportedPartitionSchemes::Counts)
+    {
+        return { CL_DEVICE_PARTITION_BY_COUNTS, 1,
+                 CL_DEVICE_PARTITION_BY_COUNTS_LIST_END };
+    }
+    else if (unsupported_properties & SupportedPartitionSchemes::Affinity)
+    {
+        return { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN,
+                 CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE, 0 };
+    }
+    else
+    {
+        return {};
+    }
+}
+
+static cl_uint get_uint_device_info(const cl_device_id device,
+                                    const cl_device_info param_name)
+{
+    cl_uint ret = 0;
+    cl_int err =
+        clGetDeviceInfo(device, param_name, sizeof(ret), &ret, nullptr);
+    test_error(err, "clGetDeviceInfo");
+    return ret;
+}
+
+REGISTER_TEST_VERSION(negative_create_sub_devices, Version(1, 2))
+{
+    int supported_properties = get_supported_properties(device);
+    if (supported_properties == SupportedPartitionSchemes::None)
+    {
+        printf("Device does not support creating subdevices... Skipping\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    cl_device_partition_property properties[4] = {};
+    cl_uint max_compute_units =
+        get_uint_device_info(device, CL_DEVICE_MAX_COMPUTE_UNITS);
+    cl_uint max_sub_devices =
+        get_uint_device_info(device, CL_DEVICE_PARTITION_MAX_SUB_DEVICES);
+    std::vector<cl_device_id> out_devices;
+    cl_uint max_for_partition = 0;
+    if (supported_properties & SupportedPartitionSchemes::Equally)
+    {
+        properties[0] = CL_DEVICE_PARTITION_EQUALLY;
+        properties[1] = 1;
+        properties[2] = 0;
+        out_devices.resize(static_cast<size_t>(max_compute_units));
+        max_for_partition = max_compute_units;
+    }
+    else if (supported_properties & SupportedPartitionSchemes::Counts)
+    {
+        properties[0] = CL_DEVICE_PARTITION_BY_COUNTS;
+        properties[1] = 1;
+        properties[2] = CL_DEVICE_PARTITION_BY_COUNTS_LIST_END;
+        out_devices.resize(static_cast<size_t>(max_sub_devices));
+        max_for_partition = max_sub_devices;
+    }
+    else
+    {
+        properties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
+        properties[1] = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE;
+        properties[2] = 0;
+    }
+
+    properties[3] = 0;
+
+    cl_int err(CL_SUCCESS);
+    for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
+    {
+        err = clCreateSubDevices(invalid_device, properties, out_devices.size(),
+                                 out_devices.data(), nullptr);
+        test_failure_error_ret(err, CL_INVALID_DEVICE,
+                               "clCreateSubDevices should return "
+                               "CL_INVALID_DEVICE when: \"in_device "
+                               "is not a valid device\"",
+                               TEST_FAIL);
+    }
+
+    err = clCreateSubDevices(device, nullptr, out_devices.size(),
+                             out_devices.data(), nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clCreateSubDevices should return CL_INVALID_VALUE when: \"values "
+        "specified in properties are not valid\" using a nullptr",
+        TEST_FAIL);
+
+    err =
+        clCreateSubDevices(device, properties, 0, out_devices.data(), nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clCreateSubDevices should return CL_INVALID_VALUE when: \"out_devices "
+        "is not NULL and num_devices is less than the number of sub-devices "
+        "created by the partition scheme\"",
+        TEST_FAIL);
+
+    if (supported_properties != SupportedPartitionSchemes::All_Schemes)
+    {
+        std::vector<cl_device_partition_property> invalid_properties =
+            get_invalid_properties(supported_properties
+                                   ^ SupportedPartitionSchemes::All_Schemes);
+        err =
+            clCreateSubDevices(device, invalid_properties.data(),
+                               out_devices.size(), out_devices.data(), nullptr);
+        test_failure_error_ret(
+            err, CL_INVALID_VALUE,
+            "clCreateSubDevices should return CL_INVALID_VALUE when: \"values "
+            "specified in properties are valid but not supported by the "
+            "device\"",
+            TEST_FAIL);
+    }
+
+    if (supported_properties & SupportedPartitionSchemes::Equally)
+    {
+        properties[1] = max_compute_units;
+        err = clCreateSubDevices(device, properties, max_for_partition,
+                                 out_devices.data(), nullptr);
+        test_failure_error_ret(
+            err, CL_DEVICE_PARTITION_FAILED,
+            "clCreateSubDevices should return CL_DEVICE_PARTITION_FAILED when: "
+            "\"the partition name is supported by the implementation but "
+            "in_device could not be further partitioned\"",
+            TEST_FAIL);
+    }
+
+    constexpr cl_device_partition_property INVALID_PARTITION_PROPERTY =
+        -1; // Aribitrary Invalid number
+    properties[0] = INVALID_PARTITION_PROPERTY;
+    err = clCreateSubDevices(device, properties, out_devices.size(),
+                             out_devices.data(), nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clCreateSubDevices should return CL_INVALID_VALUE when: \"values "
+        "specified in properties are not valid\" using an invalid property",
+        TEST_FAIL);
+
+    if (supported_properties & SupportedPartitionSchemes::Counts)
+    {
+        properties[0] = CL_DEVICE_PARTITION_BY_COUNTS;
+        properties[1] = max_sub_devices + 1;
+        err = clCreateSubDevices(device, properties, max_sub_devices + 1,
+                                 out_devices.data(), nullptr);
+        test_failure_error_ret(
+            err, CL_INVALID_DEVICE_PARTITION_COUNT,
+            "clCreateSubDevices should return "
+            "CL_INVALID_DEVICE_PARTITION_COUNT when: \"the partition name "
+            "specified in properties is CL_DEVICE_ PARTITION_BY_COUNTS and the "
+            "number of sub-devices requested exceeds "
+            "CL_DEVICE_PARTITION_MAX_SUB_DEVICES\"",
+            TEST_FAIL);
+
+        properties[1] = -1;
+        err = clCreateSubDevices(device, properties, out_devices.size(),
+                                 out_devices.data(), nullptr);
+        test_failure_error_ret(
+            err, CL_INVALID_DEVICE_PARTITION_COUNT,
+            "clCreateSubDevices should return "
+            "CL_INVALID_DEVICE_PARTITION_COUNT when: \"the number of compute "
+            "units requested for one or more sub-devices is less than zero\"",
+            TEST_FAIL);
+    }
+
+    if (supported_properties & SupportedPartitionSchemes::Equally)
+    {
+        properties[0] = CL_DEVICE_PARTITION_EQUALLY;
+        properties[1] = max_compute_units + 1;
+        err = clCreateSubDevices(device, properties, max_compute_units + 1,
+                                 out_devices.data(), nullptr);
+        test_failure_error_ret(
+            err, CL_INVALID_DEVICE_PARTITION_COUNT,
+            "clCreateSubDevices should return "
+            "CL_INVALID_DEVICE_PARTITION_COUNT when: \"the total number of "
+            "compute units requested exceeds CL_DEVICE_MAX_COMPUTE_UNITS for "
+            "in_device\"",
+            TEST_FAIL);
+    }
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clRetainDevice */
+REGISTER_TEST_VERSION(negative_retain_device, Version(1, 2))
+{
+    cl_int err(CL_SUCCESS);
+    for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
+    {
+        err = clRetainDevice(invalid_device);
+        test_failure_error_ret(err, CL_INVALID_DEVICE,
+                               "clRetainDevice should return CL_INVALID_DEVICE "
+                               "when: \"device is not "
+                               "a valid device\"",
+                               TEST_FAIL);
+    }
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clReleaseDevice */
+REGISTER_TEST_VERSION(negative_release_device, Version(1, 2))
+{
+    cl_int err(CL_SUCCESS);
+    for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
+    {
+        err = clReleaseDevice(invalid_device);
+        test_failure_error_ret(err, CL_INVALID_DEVICE,
+                               "clReleaseDevice should return "
+                               "CL_INVALID_DEVICE when: \"device is not "
+                               "a valid device\"",
+                               TEST_FAIL);
+    }
+
+    return TEST_PASS;
+}
diff --git a/test_conformance/api/negative_platform.cpp b/test_conformance/api/negative_platform.cpp
index f98ec13a..6c5922c5 100644
--- a/test_conformance/api/negative_platform.cpp
+++ b/test_conformance/api/negative_platform.cpp
@@ -40,9 +40,20 @@ REGISTER_TEST(negative_get_platform_info)
 {
     cl_platform_id platform = getPlatformFromDevice(device);
 
+    cl_int err(CL_SUCCESS);
+    for (auto invalid_platform : get_invalid_objects<cl_platform_id>(device))
+    {
+        err = clGetPlatformInfo(invalid_platform, CL_PLATFORM_VERSION,
+                                sizeof(char*), nullptr, nullptr);
+        test_failure_error_ret(err, CL_INVALID_PLATFORM,
+                               "clGetPlatformInfo should return "
+                               "CL_INVALID_PLATFORM  when: \"platform "
+                               "is not a valid platform\"",
+                               TEST_FAIL);
+    }
+
     constexpr cl_platform_info INVALID_PARAM_VALUE = 0;
-    cl_int err =
-        clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
+    err = clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
     test_failure_error_ret(
         err, CL_INVALID_VALUE,
         "clGetPlatformInfo should return CL_INVALID_VALUE when: \"param_name "

From b0876629f8b6ac0aa8e37f77d221271ce4012558 Mon Sep 17 00:00:00 2001
From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com>
Date: Tue, 2 Dec 2025 17:36:36 +0000
Subject: [PATCH 22/54] Add AHB lifetime test (#2569)

Add lifetime test for AHardwareBuffer in which for both CL Buffers and
CL Images the following steps are taken
 - Create AHB
 - Create mem object from the AHB
 - Release the AHB
 - Read and write from and to the mem object
 - Verify the reads and write have happened sucessfully

The CL implementation should maintain a reference count to the AHB since
the AHB must not be deallocated for the test to pass.

Signed-off-by: Alex Davicenko <alex.davicenko@arm.com>
Signed-off-by: Ahmed Hesham <ahmed.hesham@arm.com>
Co-authored-by: Alex Davicenko <alex.davicenko@arm.com>
---
 .../cl_khr_external_memory_ahb/test_ahb.cpp   | 303 ++++++++++++++++++
 1 file changed, 303 insertions(+)

diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
index 5151a668..dc2b5a3e 100644
--- a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
+++ b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
@@ -96,6 +96,23 @@ static const char *diff_images_kernel_source = {
         })"
 };
 
+static const char *lifetime_kernel_source = {
+    R"(
+            __kernel void increment_buffer(global uchar* buffer)
+            {
+                int tid = get_global_id(0);
+                buffer[tid] ++;
+            }
+
+            __kernel void set_image_color(write_only image2d_t ahb_image, float4 set_color)
+            {
+                int tidX = get_global_id(0);
+                int tidY = get_global_id(1);
+
+                write_imagef(ahb_image, (int2)( tidX, tidY ), set_color);
+            })"
+};
+
 // Checks that the inferred image format is correct
 REGISTER_TEST(images)
 {
@@ -1857,3 +1874,289 @@ REGISTER_TEST(blob)
 
     return TEST_PASS;
 }
+
+/*
+ * For cl buffer and cl image
+ *  Create a AHB
+ *  Create a mem object from the AHB
+ *  Release the AHB
+ *  Read and write using the mem object
+ *  Verify reads and writes
+ */
+REGISTER_TEST(lifetime_buffer)
+{
+    REQUIRE_EXTENSION("cl_khr_external_memory_android_hardware_buffer");
+
+    cl_int err;
+    constexpr cl_uint buffer_size = 4096;
+    std::vector<uint8_t> host_buffer(buffer_size, 1);
+    clMemWrapper imported_buffer;
+
+    {
+        // Check if AHB descriptors for buffers and images are supported
+        AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
+        aHardwareBufferDesc.width = buffer_size;
+        aHardwareBufferDesc.height = 1;
+        aHardwareBufferDesc.layers = 1;
+        aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
+        aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
+            | AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN;
+
+        if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
+        {
+            log_unsupported_ahb_format(aHardwareBufferDesc);
+            return TEST_SKIPPED_ITSELF;
+        }
+
+        log_info("Testing buffer lifetime\n");
+
+        AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
+
+        const cl_mem_properties props[] = {
+            CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
+            aHardwareBuffer.get_props(),
+            0,
+        };
+
+        imported_buffer = clCreateBufferWithProperties(
+            context, props, CL_MEM_READ_WRITE, 0, nullptr, &err);
+        test_error(err, "Failed to create CL buffer from AHardwareBuffer");
+
+        // Fill AHB buffer
+        void *data_ptr = nullptr;
+        int ahb_result = AHardwareBuffer_lock(
+            aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, nullptr,
+            &data_ptr);
+        if (ahb_result != 0)
+        {
+            log_error("AHardwareBuffer_lock failed with code %d\n", ahb_result);
+            return TEST_FAIL;
+        }
+
+        memcpy(data_ptr, host_buffer.data(), buffer_size);
+
+        ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr);
+        if (ahb_result != 0)
+        {
+            log_error("AHardwareBuffer_unlock failed with code %d\n",
+                      ahb_result);
+            return TEST_FAIL;
+        }
+    } // Release test scope reference to AHB
+
+
+    // Verify buffer read by comparing to host buffer
+    std::vector<uint8_t> read_buffer(buffer_size);
+    err = clEnqueueReadBuffer(queue, imported_buffer, true, 0, buffer_size,
+                              read_buffer.data(), 0, nullptr, nullptr);
+    test_error(err, "failed clEnqueueReadBuffer");
+
+    for (size_t i = 0; i < buffer_size; i++)
+    {
+        if (read_buffer[i] != host_buffer[i])
+        {
+            log_error("At position %zu expected value: %u but got value: %u\n",
+                      i, host_buffer[i], read_buffer[i]);
+            return TEST_FAIL;
+        }
+    }
+
+    // Attempt buffer write
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &lifetime_kernel_source,
+                                      "increment_buffer");
+    test_error(err, "kernel creation failed");
+
+    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imported_buffer);
+    test_error(err, "clSetKernelArg failed");
+
+    size_t gws[1] = { buffer_size };
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, gws, nullptr, 0,
+                                 nullptr, nullptr);
+    test_error(err, "Failed clEnqueueNDRangeKernel");
+
+    // Verify write
+    err = clEnqueueReadBuffer(queue, imported_buffer, true, 0, buffer_size,
+                              read_buffer.data(), 0, nullptr, nullptr);
+    test_error(err, "failed clEnqueueReadBuffer");
+
+    for (size_t i = 0; i < buffer_size; i++)
+    {
+        if (read_buffer[i]
+            != host_buffer[i] + 1) // Kernel incremented each index by 1
+        {
+            log_error("At position %zu expected value: %u but got value: %u\n",
+                      i, host_buffer[i], read_buffer[i]);
+            return TEST_FAIL;
+        }
+    }
+
+    return TEST_PASS;
+}
+
+
+REGISTER_TEST(lifetime_image)
+{
+    REQUIRE_EXTENSION("cl_khr_external_memory_android_hardware_buffer");
+
+    int err;
+    const AHardwareBuffer_Format aHardwareBufferFormat =
+        AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
+    const cl_image_format clImageFormat = { CL_RGBA, CL_UNORM_INT8 };
+    const size_t pixel_size = get_pixel_size(&clImageFormat);
+
+    for (auto resolution : test_sizes)
+    {
+        const size_t image_size =
+            resolution.width * resolution.height * pixel_size;
+
+        std::vector<uint8_t> host_image_data(image_size, 1);
+        clMemWrapper imported_image;
+        {
+            // Check if AHB descriptors for buffers and images are supported
+            AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
+            aHardwareBufferDesc.width = resolution.width;
+            aHardwareBufferDesc.height = resolution.height;
+            aHardwareBufferDesc.layers = 1;
+            aHardwareBufferDesc.format = aHardwareBufferFormat;
+            aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
+                | AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN;
+
+            if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
+            {
+                log_unsupported_ahb_format(aHardwareBufferDesc);
+                continue;
+            }
+
+            log_info("Testing image lifetime\n");
+
+            AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
+
+            const cl_mem_properties props_image[] = {
+                CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
+                aHardwareBuffer.get_props(),
+                0,
+            };
+
+            imported_image = clCreateImageWithProperties(
+                context, props_image, CL_MEM_READ_WRITE, nullptr, nullptr,
+                nullptr, &err);
+            test_error(err, "Failed to create CL image from AHardwareBuffer");
+
+            void *data_ptr = nullptr;
+            int ahb_result = AHardwareBuffer_lock(
+                aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
+                nullptr, &data_ptr);
+            if (ahb_result != 0)
+            {
+                log_error("AHardwareBuffer_lock failed with code %d\n",
+                          ahb_result);
+                return TEST_FAIL;
+            }
+
+            memcpy(data_ptr, host_image_data.data(), image_size);
+
+            ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr);
+            if (ahb_result != 0)
+            {
+                log_error("AHardwareBuffer_unlock failed with code %d\n",
+                          ahb_result);
+                return TEST_FAIL;
+            }
+        } // Release test scope reference to AHB
+
+
+        // Verify image read using host data
+        size_t origin[3] = { 0, 0, 0 };
+        size_t region[3] = { resolution.width, resolution.height, 1 };
+        size_t row_pitch;
+        uint8_t *mapped_image_ptr = static_cast<uint8_t *>(clEnqueueMapImage(
+            queue, imported_image, true, CL_MAP_READ, origin, region,
+            &row_pitch, nullptr, 0, nullptr, nullptr, &err));
+        test_error(err, "clEnqueueMapImage failed");
+
+        for (size_t row = 0; row < resolution.height; ++row)
+        {
+            for (size_t col = 0; col < resolution.width; ++col)
+            {
+                size_t mapped_image_idx = row * row_pitch + col;
+                size_t host_image_idx = row * resolution.width + col;
+                if (mapped_image_ptr[mapped_image_idx]
+                    != host_image_data[host_image_idx])
+                {
+                    log_error(
+                        "At position (%zu, %zu) expected value: %u but got "
+                        "value: %u\n",
+                        row, col, host_image_data[host_image_idx],
+                        mapped_image_ptr[mapped_image_idx]);
+                    return TEST_FAIL;
+                }
+            }
+        }
+
+        err = clEnqueueUnmapMemObject(queue, imported_image, mapped_image_ptr,
+                                      0, nullptr, nullptr);
+        test_error(err, "clEnqueueUnmapMemObject failed");
+
+        err = clFinish(queue);
+        test_error(err, "clFinish failed");
+
+
+        // Attempt image write
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+        err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                          &lifetime_kernel_source,
+                                          "set_image_color");
+        test_error(err, "kernel creation failed");
+
+        err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imported_image);
+        test_error(err, "clSetKernelArg failed");
+
+        cl_float4 color = { { 0.5f, 0.5f, 0.5f, 0.5f } };
+        err = clSetKernelArg(kernel, 1, sizeof(cl_float4), &color);
+        test_error(err, "clSetKernelArg failed");
+
+        std::vector<size_t> gws = { resolution.width, resolution.height };
+        err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, gws.data(),
+                                     nullptr, 0, nullptr, nullptr);
+        test_error(err, "Failed clEnqueueNDRangeKernel");
+
+        err = clFinish(queue);
+        test_error(err, "clFinish failed");
+
+
+        // Verify image write
+        mapped_image_ptr = static_cast<uint8_t *>(clEnqueueMapImage(
+            queue, imported_image, true, CL_MAP_READ, origin, region,
+            &row_pitch, nullptr, 0, nullptr, nullptr, &err));
+        test_error(err, "clEnqueueMapImage failed");
+
+        for (size_t row = 0; row < resolution.height; ++row)
+        {
+            for (size_t col = 0; col < resolution.width; ++col)
+            {
+                size_t mapped_image_idx = row * row_pitch + col;
+                if (128 != mapped_image_ptr[mapped_image_idx])
+                {
+                    log_error(
+                        "At position (%zu, %zu) expected value: %u but got "
+                        "value: %u\n",
+                        row, col, 128, mapped_image_ptr[mapped_image_idx]);
+                    return TEST_FAIL;
+                }
+            }
+        }
+
+        err = clEnqueueUnmapMemObject(queue, imported_image, mapped_image_ptr,
+                                      0, nullptr, nullptr);
+        test_error(err, "clEnqueueUnmapMemObject failed");
+
+        err = clFinish(queue);
+        test_error(err, "clFinish failed");
+    }
+    return TEST_PASS;
+}

From bd167754d9690a23d0210d04a9913de386ba31b8 Mon Sep 17 00:00:00 2001
From: gtrebuchet-arm <guillaume.trebuchet@arm.com>
Date: Tue, 2 Dec 2025 17:40:16 +0000
Subject: [PATCH 23/54] Add multi device and negative tests for
 cl_ext_buffer_device_address (#2561)

The tests checks that invalid parameters for
clSetKernelArgDevicePointerEXT are reported successfully and ensure that
a kernel can access a buffer from their respective device address on
each device in a multi device context.

Signed-off-by: Guillaume Trebuchet <guillaume.trebuchet@arm.com>
---
 .../buffer_device_address.cpp                 | 182 ++++++++++++++++++
 1 file changed, 182 insertions(+)

diff --git a/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp b/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp
index 97872613..50eb5011 100644
--- a/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp
+++ b/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp
@@ -14,7 +14,10 @@
 //
 
 #include "harness/typeWrappers.h"
+#include "harness/extensionHelpers.h"
 #include <cinttypes>
+#include <vector>
+#include <string>
 
 #define BUF_SIZE 1024
 #define BUF_SIZE_STR "1024"
@@ -421,6 +424,8 @@ private:
 
 REGISTER_TEST(private_address)
 {
+    REQUIRE_EXTENSION("cl_ext_buffer_device_address");
+
     BufferDeviceAddressTest test_fixture = BufferDeviceAddressTest(
         device, context, queue, CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT);
 
@@ -435,3 +440,180 @@ REGISTER_TEST(private_address)
 
     return TEST_PASS;
 }
+
+REGISTER_TEST(private_address_multi_device)
+{
+    REQUIRE_EXTENSION("cl_ext_buffer_device_address");
+
+    cl_platform_id platform = 0;
+    cl_int error = CL_SUCCESS;
+    cl_uint numDevices = 0;
+
+    error = clGetPlatformIDs(1, &platform, NULL);
+    test_error_ret(error, "Unable to get platform\n", TEST_FAIL);
+
+    /* Get some devices */
+    error =
+        clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices);
+    test_error_ret(error, "Unable to get multiple devices\n", TEST_FAIL);
+
+    if (numDevices < 2)
+    {
+        log_info(
+            "WARNING: multi device test unable to get multiple devices via "
+            "CL_DEVICE_TYPE_ALL (got %u devices). Skipping test...\n",
+            numDevices);
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    std::vector<cl_device_id> devices(numDevices);
+    error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, numDevices,
+                           devices.data(), &numDevices);
+    test_error_ret(error, "Unable to get multiple devices\n", TEST_FAIL);
+
+    GET_PFN(devices[0], clSetKernelArgDevicePointerEXT);
+
+    cl_context_properties properties[] = { CL_CONTEXT_PLATFORM,
+                                           (cl_context_properties)platform, 0 };
+    clContextWrapper ctx = clCreateContext(
+        properties, numDevices, devices.data(), nullptr, nullptr, &error);
+    test_error_ret(error, "Unable to create context\n", TEST_FAIL);
+
+    /* Create buffer */
+    cl_mem_properties props[] = { CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT, CL_TRUE,
+                                  0 };
+    clMemWrapper buffer = clCreateBufferWithProperties(
+        ctx, props, CL_MEM_READ_WRITE, 16, nullptr, &error);
+    std::vector<cl_mem_device_address_ext> addresses(numDevices);
+    error =
+        clGetMemObjectInfo(buffer, CL_MEM_DEVICE_ADDRESS_EXT,
+                           sizeof(cl_mem_device_address_ext) * addresses.size(),
+                           addresses.data(), nullptr);
+    test_error_ret(error, "clGetMemObjectInfo failed\n", TEST_FAIL);
+
+    std::vector<clCommandQueueWrapper> queues(numDevices);
+    for (cl_uint i = 0; i < numDevices; ++i)
+    {
+        queues[i] = clCreateCommandQueue(ctx, devices[i], 0, &error);
+        test_error_ret(error, "Unable to create command queue\n", TEST_FAIL);
+    }
+    static std::string source = R"(
+        void kernel test_device_address(
+            global ulong* ptr,
+            ulong value)
+        {
+            *ptr = value;
+        })";
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    const char *source_ptr = source.data();
+    error = create_single_kernel_helper(ctx, &program, &kernel, 1, &source_ptr,
+                                        "test_device_address");
+    test_error(error, "Unable to create test kernel");
+    for (cl_uint i = 0; i < numDevices; ++i)
+    {
+        cl_command_queue queue = queues[i];
+
+        error = clSetKernelArgDevicePointerEXT(kernel, 0, 0);
+        test_error_fail(error,
+                        "clSetKernelArgDevicePointerEXT failed with NULL "
+                        "pointer argument\n");
+
+        error = clSetKernelArgDevicePointerEXT(kernel, 0, addresses[i] + 8);
+        test_error_ret(error, "Unable to set kernel arg\n", TEST_FAIL);
+
+        const cl_ulong pattern = 0xAABBCCDDEEFF0011 + i;
+        error = clSetKernelArg(kernel, 1, sizeof(pattern), &pattern);
+        test_error_ret(error, "Unable to set kernel arg\n", TEST_FAIL);
+
+        size_t gwo = 0;
+        size_t gws = 1;
+        size_t lws = 1;
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, &gwo, &gws, &lws, 0,
+                                       nullptr, nullptr);
+        test_error_ret(error, "Unable to enqueue kernel\n", TEST_FAIL);
+
+        error = clFinish(queue);
+        test_error_ret(error, "clFinish failed\n", TEST_FAIL);
+
+        std::vector<cl_ulong> results(2, 0);
+        error = clEnqueueReadBuffer(queue, buffer, CL_BLOCKING, 0,
+                                    results.size() * sizeof(cl_ulong),
+                                    results.data(), 0, nullptr, nullptr);
+        test_error_ret(error, "clEnqueueReadBuffer failed\n", TEST_FAIL);
+
+        if (results[1] != pattern)
+            test_fail("Test value doesn't match expected value\n");
+    }
+    return TEST_PASS;
+}
+
+REGISTER_TEST(negative_private_address)
+{
+    REQUIRE_EXTENSION("cl_ext_buffer_device_address");
+
+    cl_int error = CL_SUCCESS;
+
+    GET_PFN(device, clSetKernelArgDevicePointerEXT);
+
+    /* Create buffer */
+    clMemWrapper buffer = clCreateBufferWithProperties(
+        context, nullptr, CL_MEM_READ_WRITE, 16, nullptr, &error);
+    cl_mem_device_address_ext address;
+    error = clGetMemObjectInfo(buffer, CL_MEM_DEVICE_ADDRESS_EXT,
+                               sizeof(cl_mem_device_address_ext), &address,
+                               nullptr);
+    test_failure_error_ret(
+        error, CL_INVALID_OPERATION,
+        "clGetMemObjectInfo should return CL_INVALID_OPERATION when: "
+        "\"the buffer was not created with CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT\"",
+        TEST_FAIL);
+
+    static std::string source = R"(
+        void kernel test_device_address(
+            global ulong* ptr,
+            local ulong* ptr2,
+            ulong value)
+        {
+            *ptr = value;
+        })";
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    const char *source_ptr = source.data();
+    error = create_single_kernel_helper(context, &program, &kernel, 1,
+                                        &source_ptr, "test_device_address");
+    test_error(error, "Unable to create test kernel");
+
+    error = clSetKernelArgDevicePointerEXT(nullptr, 0, 0);
+    test_failure_error_ret(
+        error, CL_INVALID_KERNEL,
+        "clSetKernelArgDevicePointerEXT should return CL_INVALID_KERNEL when: "
+        "\"kernel is not a valid kernel object\"",
+        TEST_FAIL);
+
+    error = clSetKernelArgDevicePointerEXT(kernel, 1, 0x15465);
+    test_failure_error_ret(
+        error, CL_INVALID_ARG_INDEX,
+        "clSetKernelArgDevicePointerEXT should return "
+        "CL_INVALID_ARG_INDEX when: "
+        "\"the expected kernel argument is not a pointer to global memory\"",
+        TEST_FAIL);
+
+    error = clSetKernelArgDevicePointerEXT(kernel, 2, 0x15465);
+    test_failure_error_ret(error, CL_INVALID_ARG_INDEX,
+                           "clSetKernelArgDevicePointerEXT should return "
+                           "CL_INVALID_ARG_INDEX when: "
+                           "\"the expected kernel argument is not a pointer\"",
+                           TEST_FAIL);
+
+    error = clSetKernelArgDevicePointerEXT(kernel, 3, 0x15465);
+    test_failure_error_ret(error, CL_INVALID_ARG_INDEX,
+                           "clSetKernelArgDevicePointerEXT should return "
+                           "CL_INVALID_ARG_INDEX when: "
+                           "\"arg_index is not a valid argument index\"",
+                           TEST_FAIL);
+
+    return TEST_PASS;
+}

From 5179e06ed92368e76e8601160e6560ccd7035113 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 9 Dec 2025 17:37:12 +0100
Subject: [PATCH 24/54] Added test to verify reqd_work_group_size attribute
 with cl_khr_command_buffer extension (#2583)

Fixes #2501 according to issue description

---------

Co-authored-by: Ben Ashbaugh <ben.ashbaugh@intel.com>
---
 .../api/test_kernel_attributes.cpp            |   2 +
 .../cl_khr_command_buffer/CMakeLists.txt      |   1 +
 .../command_buffer_kernel_attributes.cpp      | 235 ++++++++++++++++++
 3 files changed, 238 insertions(+)
 create mode 100644 test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp

diff --git a/test_conformance/api/test_kernel_attributes.cpp b/test_conformance/api/test_kernel_attributes.cpp
index dd50e0f9..fadae00a 100644
--- a/test_conformance/api/test_kernel_attributes.cpp
+++ b/test_conformance/api/test_kernel_attributes.cpp
@@ -382,6 +382,7 @@ REGISTER_TEST(null_required_work_group_size)
     clMemWrapper dst;
     dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int),
                          nullptr, &error);
+    test_error(error, "clCreateBuffer failed");
 
     struct KernelAttribInfo
     {
@@ -449,6 +450,7 @@ REGISTER_TEST(null_required_work_group_size)
             error =
                 clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0,
                                     sizeof(attrib.wgs), 0, nullptr, nullptr);
+            test_error(error, "clEnqueueFillBuffer failed");
 
             const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 };
             error = clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr,
diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
index 9e54fecc..1a50db93 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
@@ -18,6 +18,7 @@ set(${MODULE_NAME}_SOURCES
     command_buffer_test_event_info.cpp
     command_buffer_finalize.cpp
     command_buffer_pipelined_enqueue.cpp
+    command_buffer_kernel_attributes.cpp
     negative_command_buffer_finalize.cpp
     negative_command_buffer_svm_mem.cpp
     negative_command_buffer_copy_image.cpp
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp
new file mode 100644
index 00000000..4eddd1d8
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp
@@ -0,0 +1,235 @@
+//
+// Copyright (c) 2025 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "basic_command_buffer.h"
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// Tests for cl_khr_command_buffer while enqueueing a kernel with a
+// reqd_work_group_size with a NULL local_work_size.
+
+struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest
+{
+    inline static const std::string body_str = R"(
+        __kernel void wg_size(__global int* dst)
+        {
+            if (get_global_id(0) == 0 &&
+                get_global_id(1) == 0 &&
+                get_global_id(2) == 0) {
+                dst[0] = get_local_size(0);
+                dst[1] = get_local_size(1);
+                dst[2] = get_local_size(2);
+            }
+        }
+    )";
+
+    KernelAttributesReqGroupSizeTest(cl_device_id device, cl_context context,
+                                     cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue), dst(nullptr),
+          clGetKernelSuggestedLocalWorkSizeKHR(nullptr),
+          device_max_work_group_size(0)
+    {}
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        if (is_extension_available(device, "cl_khr_suggested_local_work_size"))
+        {
+            cl_platform_id platform = nullptr;
+            error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
+                                    sizeof(platform), &platform, NULL);
+            test_error(error, "clGetDeviceInfo for platform failed");
+
+            clGetKernelSuggestedLocalWorkSizeKHR =
+                (clGetKernelSuggestedLocalWorkSizeKHR_fn)
+                    clGetExtensionFunctionAddressForPlatform(
+                        platform, "clGetKernelSuggestedLocalWorkSizeKHR");
+            test_assert_error(clGetKernelSuggestedLocalWorkSizeKHR != nullptr,
+                              "Couldn't get function pointer for "
+                              "clGetKernelSuggestedLocalWorkSizeKHR");
+        }
+
+        dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int),
+                             nullptr, &error);
+        test_error(error, "clCreateBuffer failed");
+
+
+        cl_uint device_max_dim = 0;
+        error =
+            clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
+                            sizeof(device_max_dim), &device_max_dim, nullptr);
+        test_error(
+            error,
+            "clGetDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
+        test_assert_error(
+            device_max_dim >= 3,
+            "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS must be at least 3!");
+
+        device_max_work_item_sizes.resize(device_max_dim);
+        error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+                                sizeof(size_t) * device_max_dim,
+                                device_max_work_item_sizes.data(), nullptr);
+
+        error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+                                sizeof(device_max_work_group_size),
+                                &device_max_work_group_size, nullptr);
+        test_error(error,
+                   "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_int Run() override
+    {
+        cl_int error = CL_SUCCESS;
+        struct KernelAttribInfo
+        {
+            cl_int wgs[3];
+            cl_uint min_dim;
+        };
+
+        std::vector<KernelAttribInfo> attribs = { { { 2, 1, 1 }, 1 },
+                                                  { { 2, 3, 1 }, 2 },
+                                                  { { 2, 3, 4 }, 3 } };
+
+        for (auto& attrib : attribs)
+        {
+            const std::string attrib_str =
+                "__attribute__((reqd_work_group_size("
+                + std::to_string(attrib.wgs[0]) + ","
+                + std::to_string(attrib.wgs[1]) + ","
+                + std::to_string(attrib.wgs[2]) + ")))";
+            const std::string source_str = attrib_str + body_str;
+            const char* source = source_str.c_str();
+
+            clProgramWrapper program;
+            clKernelWrapper kernel;
+            error = create_single_kernel_helper(context, &program, &kernel, 1,
+                                                &source, "wg_size");
+            test_error(error, "Unable to create test kernel");
+
+            error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst);
+            test_error(error, "clSetKernelArg failed");
+
+            for (cl_uint work_dim = attrib.min_dim; work_dim <= 3; work_dim++)
+            {
+                const size_t test_work_group_size =
+                    attrib.wgs[0] * attrib.wgs[1] * attrib.wgs[2];
+                if ((size_t)attrib.wgs[0] > device_max_work_item_sizes[0]
+                    || (size_t)attrib.wgs[1] > device_max_work_item_sizes[1]
+                    || (size_t)attrib.wgs[2] > device_max_work_item_sizes[2]
+                    || test_work_group_size > device_max_work_group_size)
+                {
+                    log_info(
+                        "Skipping test for work_dim = %u: required work group "
+                        "size (%i, %i, %i) (total %zu) exceeds device max "
+                        "work group size (%zu, %zu, %zu) (total %zu)\n",
+                        work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
+                        test_work_group_size, device_max_work_item_sizes[0],
+                        device_max_work_item_sizes[1],
+                        device_max_work_item_sizes[2],
+                        device_max_work_group_size);
+                    continue;
+                }
+
+                const cl_int zero = 0;
+                error = clCommandFillBufferKHR(
+                    command_buffer, nullptr, nullptr, dst, &zero, sizeof(zero),
+                    0, sizeof(attrib.wgs), 0, nullptr, nullptr, nullptr);
+                test_error(error, "clCommandFillBufferKHR failed");
+
+                const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 };
+                error = clCommandNDRangeKernelKHR(
+                    command_buffer, nullptr, nullptr, kernel, work_dim, nullptr,
+                    global_work_size, nullptr, 0, nullptr, nullptr, nullptr);
+                test_error(error, "clCommandNDRangeKernelKHR failed");
+
+                error = clFinalizeCommandBufferKHR(command_buffer);
+                test_error(error, "clFinalizeCommandBufferKHR failed");
+
+                error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                                  nullptr, nullptr);
+                test_error(error, "clEnqueueCommandBufferKHR failed");
+
+                cl_int results[3] = { -1, -1, -1 };
+                error =
+                    clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(results),
+                                        results, 0, nullptr, nullptr);
+                test_error(error, "clEnqueueReadBuffer failed");
+
+                // Verify the result
+                if (results[0] != attrib.wgs[0] || results[1] != attrib.wgs[1]
+                    || results[2] != attrib.wgs[2])
+                {
+                    log_error(
+                        "Executed local size mismatch with work_dim = %u: "
+                        "Expected (%d,%d,%d) got (%d,%d,%d)\n",
+                        work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
+                        results[0], results[1], results[2]);
+                    return TEST_FAIL;
+                }
+
+                if (clGetKernelSuggestedLocalWorkSizeKHR != nullptr)
+                {
+                    size_t suggested[3] = { 1, 1, 1 };
+                    error = clGetKernelSuggestedLocalWorkSizeKHR(
+                        queue, kernel, work_dim, nullptr, global_work_size,
+                        suggested);
+                    test_error(error,
+                               "clGetKernelSuggestedLocalWorkSizeKHR failed");
+
+                    if (suggested[0] != (size_t)attrib.wgs[0]
+                        || suggested[1] != (size_t)attrib.wgs[1]
+                        || suggested[2] != (size_t)attrib.wgs[2])
+                    {
+                        log_error(
+                            "Suggested local size mismatch with work_dim = "
+                            "%u: Expected (%d,%d,%d) got (%zu,%zu,%zu)\n",
+                            work_dim, attrib.wgs[0], attrib.wgs[1],
+                            attrib.wgs[2], suggested[0], suggested[1],
+                            suggested[2]);
+                        return TEST_FAIL;
+                    }
+                }
+
+                // create new command buffer
+                command_buffer =
+                    clCreateCommandBufferKHR(1, &queue, nullptr, &error);
+                test_error(error, "clCreateCommandBufferKHR failed");
+            }
+        }
+
+        return CL_SUCCESS;
+    }
+
+    clMemWrapper dst;
+    clGetKernelSuggestedLocalWorkSizeKHR_fn
+        clGetKernelSuggestedLocalWorkSizeKHR;
+
+    size_t device_max_work_group_size;
+    std::vector<size_t> device_max_work_item_sizes;
+};
+
+} // anonymous namespace
+
+REGISTER_TEST(command_null_required_work_group_size)
+{
+    return MakeAndRunTest<KernelAttributesReqGroupSizeTest>(
+        device, context, queue, num_elements);
+}

From 68c3eec0516f6b06d4618a604e7708973375fb26 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 9 Dec 2025 17:40:39 +0100
Subject: [PATCH 25/54] Remove overwrite of CMAKE_CXX_FLAGS from
 cl_khr_external_semaphore test (#2596)

Fixes #2430
---
 .../extensions/cl_khr_external_semaphore/CMakeLists.txt          | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt b/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
index 1161e519..19cef51d 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
@@ -18,7 +18,6 @@ include_directories (${CLConform_INCLUDE_DIR})
 
 list(APPEND CLConform_LIBRARIES vulkan_wrapper)
 set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
-set(CMAKE_CXX_FLAGS "-fpermissive")
 
 include_directories("../../common/vulkan_wrapper")
 

From afb6f6519c6fb931c6675d404542ecd84048ca59 Mon Sep 17 00:00:00 2001
From: Xin Jin <xin.jin@arm.com>
Date: Tue, 9 Dec 2025 18:06:57 +0000
Subject: [PATCH 26/54] Tighten AHB buffer row pitch in
 test_cl_khr_external_memory_ahb test (#2594)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

What was wrong:
enqueue_copy_buffer_to_image (and the related write/fill tests) mis-set
imageInfo.rowPitch to width*height*pixelSize. Because get_image_size
multiplies row pitch by height, this wrongly calculates the intended
buffer size.

How it’s fixed: set rowPitch to the true per-line pitch
(width*pixelSize) for all buffer-backed image cases so the calculated
sizes match the actual data layout and stay within the expected memory
footprint.

Signed-off-by: Xin Jin <xin.jin@arm.com>
---
 .../cl_khr_external_memory_ahb/test_ahb.cpp          | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
index dc2b5a3e..303eace7 100644
--- a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
+++ b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
@@ -1239,8 +1239,8 @@ REGISTER_TEST(enqueue_copy_buffer_to_image)
                 imageInfo.type = format.clMemObjectType;
                 imageInfo.width = resolution.width;
                 imageInfo.height = resolution.height;
-                imageInfo.rowPitch = resolution.width * resolution.height
-                    * pixelSize; // data is tightly packed in buffer
+                // data is tightly packed in buffer
+                imageInfo.rowPitch = resolution.width * pixelSize;
                 test_assert_error(imageInfo.rowPitch
                                       >= pixelSize * imageInfo.width,
                                   "Row pitch is smaller than width");
@@ -1446,8 +1446,8 @@ REGISTER_TEST(enqueue_write_image)
                 imageInfo.type = format.clMemObjectType;
                 imageInfo.width = resolution.width;
                 imageInfo.height = resolution.height;
-                imageInfo.rowPitch = resolution.width * resolution.height
-                    * pixelSize; // Data is tightly packed
+                // Data is tightly packed
+                imageInfo.rowPitch = resolution.width * pixelSize;
                 test_assert_error(imageInfo.rowPitch
                                       >= pixelSize * imageInfo.width,
                                   "Row pitch is smaller than width");
@@ -1637,8 +1637,8 @@ REGISTER_TEST(enqueue_fill_image)
                 imageInfo.type = format.clMemObjectType;
                 imageInfo.width = resolution.width;
                 imageInfo.height = resolution.height;
-                imageInfo.rowPitch = resolution.width * resolution.height
-                    * pixelSize; // Data is tightly packed
+                imageInfo.rowPitch = resolution.width * pixelSize;
+                // Data is tightly packed
                 test_assert_error(imageInfo.rowPitch
                                       >= pixelSize * imageInfo.width,
                                   "Row pitch is smaller than width");

From 67fbbe4ee24a15eed6d8875b2540da31af515495 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 9 Dec 2025 19:10:56 +0100
Subject: [PATCH 27/54] Unified cl_khr_external_semaphore tests using the same
 export and import scheme (#2591)

Due to discussion from #2542 and following work from closed PR #2568

This change refactors the external semaphore tests by unifying
`external_semaphores_import_export_fd` with
`external_semaphores_cross_context` tests, removing duplicated logic and
avoiding OS-specific conditions. The updated test now covers all
import/export handle types consistently across single- and multi-context
scenarios.
---
 .../cl_khr_external_semaphore/CMakeLists.txt  |   1 -
 .../test_external_semaphore.cpp               |  93 +++++++++---
 .../test_external_semaphore_sync_fd.cpp       | 142 ------------------
 3 files changed, 76 insertions(+), 160 deletions(-)
 delete mode 100644 test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp

diff --git a/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt b/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
index 19cef51d..34d9034b 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
@@ -3,7 +3,6 @@ set(MODULE_NAME CL_KHR_EXTERNAL_SEMAPHORE)
 set(${MODULE_NAME}_SOURCES
     main.cpp
     test_external_semaphore.cpp
-    test_external_semaphore_sync_fd.cpp
 )
 
 set (CLConform_VULKAN_LIBRARIES_DIR "${VULKAN_LIB_DIR}")
diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
index 198bf046..b125864f 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
@@ -197,8 +197,11 @@ REGISTER_TEST_VERSION(external_semaphores_queries, Version(1, 2))
     return TEST_PASS;
 }
 
-REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
+cl_int doTestImportExport(cl_device_id device, cl_context contexts[2],
+                          cl_command_queue queues[2])
 {
+    cl_int err = CL_SUCCESS;
+
     REQUIRE_EXTENSION("cl_khr_external_semaphore");
 
     GET_PFN(device, clEnqueueSignalSemaphoresKHR);
@@ -210,7 +213,6 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
     std::vector<cl_external_semaphore_handle_type_khr> import_handle_types;
     std::vector<cl_external_semaphore_handle_type_khr> export_handle_types;
 
-    cl_int err = CL_SUCCESS;
     err = get_device_semaphore_handle_types(
         device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
         import_handle_types);
@@ -237,17 +239,9 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
         export_handle_types.begin(), export_handle_types.end(),
         std::back_inserter(import_export_handle_types));
 
-    cl_context context2 =
-        clCreateContext(NULL, 1, &device, notify_callback, NULL, &err);
-    test_error(err, "Failed to create context2");
-
-    clCommandQueueWrapper queue1 =
-        clCreateCommandQueue(context, device, 0, &err);
-    test_error(err, "Could not create command queue");
-
-    clCommandQueueWrapper queue2 =
-        clCreateCommandQueue(context2, device, 0, &err);
-    test_error(err, "Could not create command queue");
+    cl_context& context2 = contexts[1];
+    cl_command_queue& queue1 = queues[0];
+    cl_command_queue& queue2 = queues[1];
 
     if (import_export_handle_types.empty())
     {
@@ -270,7 +264,7 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
 
         // Signal semaphore on context1
         cl_semaphore_khr exportable_semaphore =
-            clCreateSemaphoreWithPropertiesKHR(context, export_props, &err);
+            clCreateSemaphoreWithPropertiesKHR(contexts[0], export_props, &err);
         test_error(err, "Failed to create exportable semaphore");
 
         err = clEnqueueSignalSemaphoresKHR(queue1, 1, &exportable_semaphore,
@@ -313,12 +307,77 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
         test_error(err, "Failed to release semaphore");
     }
 
-    err = clReleaseContext(context2);
-    test_error(err, "Failed to release context2");
-
     return TEST_PASS;
 }
 
+REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
+{
+    cl_int err = CL_SUCCESS;
+
+    clContextWrapper context_sec =
+        clCreateContext(NULL, 1, &device, notify_callback, NULL, &err);
+    test_error(err, "Failed to create context2");
+    cl_context contexts[2] = { context, context_sec };
+
+    clCommandQueueWrapper queue0 =
+        clCreateCommandQueue(context, device, 0, &err);
+    test_error(err, "Could not create command queue");
+
+    clCommandQueueWrapper queue1 =
+        clCreateCommandQueue(contexts[1], device, 0, &err);
+    test_error(err, "Could not create command queue");
+    cl_command_queue queues[2] = { queue0, queue1 };
+
+    return doTestImportExport(device, contexts, queues);
+}
+
+REGISTER_TEST_VERSION(external_semaphores_import_export, Version(1, 2))
+{
+    cl_int err = CL_SUCCESS;
+    cl_int total_status = TEST_PASS;
+
+    // test external semaphores with out-of-order queue
+    {
+        cl_command_queue_properties device_props = 0;
+        err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
+                              sizeof(device_props), &device_props, NULL);
+        test_error(err,
+                   "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
+
+        if ((device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0)
+        {
+            // Create ooo queue
+            clCommandQueueWrapper test_queue = clCreateCommandQueue(
+                context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+            test_error(err, "Could not create command queue");
+
+            cl_command_queue queues[2] = { test_queue, test_queue };
+            cl_context contexts[2] = { context, context };
+
+            cl_int status = doTestImportExport(device, contexts, queues);
+            if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF)
+            {
+                total_status = TEST_FAIL;
+            }
+        }
+    }
+
+    // test external semaphore sync fd with in-order harness queue
+    {
+        cl_command_queue queues[2] = { queue, queue };
+        cl_context contexts[2] = { context, context };
+
+        cl_int status = doTestImportExport(device, contexts, queues);
+        if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF)
+        {
+            total_status = TEST_FAIL;
+        }
+    }
+
+    return total_status;
+}
+
+
 // Confirm that a signal followed by a wait will complete successfully
 REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
 {
diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp
deleted file mode 100644
index f4636c9a..00000000
--- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-//
-// Copyright (c) 2024 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "harness/typeWrappers.h"
-#include "harness/extensionHelpers.h"
-#include "harness/errorHelpers.h"
-
-cl_int doTest(cl_device_id device, cl_context context, cl_command_queue queue)
-{
-    cl_int err = CL_SUCCESS;
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-    GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-
-    // Create semaphore
-    cl_semaphore_properties_khr sema_1_props[] = {
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_HANDLE_SYNC_FD_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
-        0
-    };
-    cl_semaphore_khr sema_1 =
-        clCreateSemaphoreWithPropertiesKHR(context, sema_1_props, &err);
-    test_error(err, "Could not create semaphore");
-
-    // Signal semaphore
-    clEventWrapper signal_event;
-    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 0, nullptr,
-                                       &signal_event);
-    test_error(err, "Could not signal semaphore");
-
-    // Extract sync fd
-    int handle = -1;
-    size_t handle_size;
-    err = clGetSemaphoreHandleForTypeKHR(sema_1, device,
-                                         CL_SEMAPHORE_HANDLE_SYNC_FD_KHR,
-                                         sizeof(handle), &handle, &handle_size);
-    test_error(err, "Could not extract semaphore handle");
-    test_assert_error(sizeof(handle) == handle_size, "Invalid handle size");
-    test_assert_error(handle >= 0, "Invalid handle");
-
-    // Create semaphore from sync fd
-    cl_semaphore_properties_khr sema_2_props[] = {
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
-        CL_SEMAPHORE_HANDLE_SYNC_FD_KHR,
-        static_cast<cl_semaphore_properties_khr>(handle), 0
-    };
-
-    cl_semaphore_khr sema_2 =
-        clCreateSemaphoreWithPropertiesKHR(context, sema_2_props, &err);
-    test_error(err, "Could not create semaphore");
-
-    // Wait semaphore
-    clEventWrapper wait_event;
-    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_2, nullptr, 0, nullptr,
-                                     &wait_event);
-    test_error(err, "Could not wait semaphore");
-
-    // Finish
-    err = clFinish(queue);
-    test_error(err, "Could not finish queue");
-
-    // Check all events are completed
-    test_assert_event_complete(signal_event);
-    test_assert_event_complete(wait_event);
-
-    // Release semaphore
-    err = clReleaseSemaphoreKHR(sema_1);
-    test_error(err, "Could not release semaphore");
-
-    err = clReleaseSemaphoreKHR(sema_2);
-    test_error(err, "Could not release semaphore");
-    return TEST_PASS;
-}
-
-// Test it is possible to export a semaphore to a sync fd and import the same
-// sync fd to a new semaphore
-REGISTER_TEST_VERSION(external_semaphores_import_export_fd, Version(1, 2))
-{
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_sync_fd");
-
-    cl_int err = CL_SUCCESS;
-    cl_int total_status = TEST_PASS;
-
-    // test external semaphore sync fd with out-of-order queue
-    {
-        cl_command_queue_properties device_props = 0;
-        err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
-                              sizeof(device_props), &device_props, NULL);
-        test_error(err,
-                   "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
-
-        if ((device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0)
-        {
-            // Create ooo queue
-            clCommandQueueWrapper test_queue = clCreateCommandQueue(
-                context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
-            test_error(err, "Could not create command queue");
-
-            cl_int status = doTest(device, context, test_queue);
-            if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF)
-            {
-                total_status = TEST_FAIL;
-            }
-        }
-    }
-
-    // test external semaphore sync fd with in-order harness queue
-    {
-        cl_int status = doTest(device, context, queue);
-        if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF)
-        {
-            total_status = TEST_FAIL;
-        }
-    }
-
-    return total_status;
-}

From 119af24d54d1ba75b03abdf056aa5dc2d3cbefb7 Mon Sep 17 00:00:00 2001
From: Yilong Guo <yilong.guo@intel.com>
Date: Wed, 17 Dec 2025 00:37:33 +0800
Subject: [PATCH 28/54] c11_atomics: unify host half representation and
 conversion with wrapper class (#2503)

Introduce `HostHalf` wrapper class to eliminate explicit
`cl_half_from_float`
and `cl_half_to_float` conversions throughout the test code. The wrapper
provides semantic value constructors/operators and automatic
conversions,
simplifying half-precision arithmetic operations.

Key improvements:
- `HostHalf` class with operator overloading for arithmetic and
comparisons
- Type traits `is_host_atomic_fp_v` and `is_host_fp_v` for generic FP
handling
- Unified floating-point atomic operations (add/sub/min/max/exchange)
- Removed 300+ lines of half-specific conditional branches
- Consistent calculation for all FP types
---
 test_conformance/c11_atomics/common.cpp       |   8 +-
 test_conformance/c11_atomics/common.h         |  21 +-
 test_conformance/c11_atomics/host_atomics.h   | 173 ++++--
 test_conformance/c11_atomics/test_atomics.cpp | 540 ++++--------------
 4 files changed, 248 insertions(+), 494 deletions(-)

diff --git a/test_conformance/c11_atomics/common.cpp b/test_conformance/c11_atomics/common.cpp
index 3be3fbc1..7bb2da76 100644
--- a/test_conformance/c11_atomics/common.cpp
+++ b/test_conformance/c11_atomics/common.cpp
@@ -194,9 +194,9 @@ template<> cl_int AtomicTypeExtendedInfo<cl_int>::MinValue() {return CL_INT_MIN;
 template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;}
 template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;}
 template <> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() { return 0; }
-template <> cl_half AtomicTypeExtendedInfo<cl_half>::MinValue()
+template <> HostHalf AtomicTypeExtendedInfo<HostHalf>::MinValue()
 {
-    return cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode);
+    return -CL_HALF_MAX;
 }
 template <> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue()
 {
@@ -217,9 +217,9 @@ template <> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue()
 }
 template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;}
 template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
-template <> cl_half AtomicTypeExtendedInfo<cl_half>::MaxValue()
+template <> HostHalf AtomicTypeExtendedInfo<HostHalf>::MaxValue()
 {
-    return cl_half_from_float(CL_HALF_MAX, gHalfRoundingMode);
+    return CL_HALF_MAX;
 }
 template <> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue()
 {
diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index 09535a88..c9494c6c 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -183,7 +183,8 @@ public:
                         const std::vector<HostAtomicType> &testValues,
                         cl_uint whichDestValue)
     {
-        return expected != testValues[whichDestValue];
+        return expected
+            != static_cast<HostDataType>(testValues[whichDestValue]);
     }
     virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                               MTdata d)
@@ -911,12 +912,9 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
             + ss.str() + "] = {\n";
         ss.str("");
 
-        if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
-            == TYPE_ATOMIC_FLOAT)
-            ss << std::setprecision(10) << _startValue;
-        else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
-                 == TYPE_ATOMIC_HALF)
-            ss << cl_half_to_float(static_cast<cl_half>(_startValue));
+        if constexpr (is_host_fp_v<HostDataType>)
+            ss << std::hexfloat
+               << _startValue; // use hex format for accurate representation
         else
             ss << _startValue;
 
@@ -1305,7 +1303,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
     numDestItems = NumResults(threadCount, deviceID);
 
     destItems.resize(numDestItems);
-    for (cl_uint i = 0; i < numDestItems; i++) destItems[i] = _startValue;
+    for (cl_uint i = 0; i < numDestItems; i++)
+        destItems[i] = static_cast<HostAtomicType>(_startValue);
 
     // Create main buffer with atomic variables (array size dependent on
     // particular test)
@@ -1483,7 +1482,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
             std::stringstream logLine;
             logLine << "ERROR: Result " << i
                     << " from kernel does not validate! (should be " << expected
-                    << ", was " << destItems[i] << ")\n";
+                    << ", was " << static_cast<HostDataType>(destItems[i])
+                    << ")\n";
             log_error("%s", logLine.str().c_str());
             for (i = 0; i < threadCount; i++)
             {
@@ -1550,7 +1550,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
                                  // clEnqueueNDRangeKernel
     {
         /* Re-write the starting value */
-        for (size_t i = 0; i < numDestItems; i++) destItems[i] = _startValue;
+        for (size_t i = 0; i < numDestItems; i++)
+            destItems[i] = static_cast<HostAtomicType>(_startValue);
         refValues[0] = 0;
         if (deviceThreadCount > 0)
         {
diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h
index fac21642..8d875bc9 100644
--- a/test_conformance/c11_atomics/host_atomics.h
+++ b/test_conformance/c11_atomics/host_atomics.h
@@ -24,6 +24,8 @@
 #include "Windows.h"
 #endif
 
+extern cl_half_rounding_mode gHalfRoundingMode;
+
 //flag for test verification (good test should discover non-atomic functions and fail)
 //#define NON_ATOMIC_FUNCTIONS
 
@@ -37,6 +39,93 @@ enum TExplicitMemoryOrderType
   MEMORY_ORDER_SEQ_CST
 };
 
+// Wrapper class for half-precision
+class HostHalf {
+public:
+    // Convert from semantic values
+    HostHalf(cl_uint value = 0)
+        : value(
+            cl_half_from_float(static_cast<float>(value), gHalfRoundingMode))
+    {}
+    HostHalf(int value): HostHalf(static_cast<cl_uint>(value)) {}
+    HostHalf(float value): value(cl_half_from_float(value, gHalfRoundingMode))
+    {}
+    HostHalf(double value): HostHalf(static_cast<float>(value)) {}
+
+    // Convert to semantic values
+    operator cl_uint() const
+    {
+        return static_cast<cl_uint>(cl_half_to_float(value));
+    }
+    operator float() const { return cl_half_to_float(value); }
+    operator double() const
+    {
+        return static_cast<double>(cl_half_to_float(value));
+    }
+
+    // Construct from bit representation
+    HostHalf(cl_half value): value(value) {}
+
+    // Get the underlying bit representation
+    operator cl_half() const { return value; }
+
+    HostHalf operator-() const
+    {
+        return HostHalf(
+            cl_half_from_float(-cl_half_to_float(value), gHalfRoundingMode));
+    }
+
+#define GENERIC_OP(RetType, op)                                                \
+    RetType operator op(const HostHalf &other) const                           \
+    {                                                                          \
+        return RetType(cl_half_to_float(value)                                 \
+                           op cl_half_to_float(other.value));                  \
+    }
+
+    GENERIC_OP(bool, ==)
+    GENERIC_OP(bool, !=)
+    GENERIC_OP(bool, <)
+    GENERIC_OP(bool, <=)
+    GENERIC_OP(bool, >)
+    GENERIC_OP(bool, >=)
+    GENERIC_OP(HostHalf, +)
+    GENERIC_OP(HostHalf, -)
+    GENERIC_OP(HostHalf, *)
+    GENERIC_OP(HostHalf, /)
+#undef GENERIC_OP
+
+#define INPLACE_OP(op)                                                         \
+    HostHalf &operator op##=(const HostHalf &other)                            \
+    {                                                                          \
+        value = cl_half_from_float(cl_half_to_float(value)                     \
+                                       op cl_half_to_float(other.value),       \
+                                   gHalfRoundingMode);                         \
+        return *this;                                                          \
+    }
+    INPLACE_OP(+)
+    INPLACE_OP(-)
+    INPLACE_OP(*)
+    INPLACE_OP(/)
+#undef INPLACE_OP
+
+    friend std::ostream &operator<<(std::ostream &os, const HostHalf &hh)
+    {
+        float f = cl_half_to_float(hh.value);
+        os << f;
+        return os;
+    }
+
+private:
+    cl_half value;
+};
+
+namespace std {
+inline HostHalf abs(const HostHalf &value)
+{
+    return value < HostHalf(0) ? -value : value;
+}
+} // namespace std
+
 // host atomic types (applicable for atomic functions supported on host OS)
 #ifdef WIN32
 #define HOST_ATOMIC_INT         unsigned long
@@ -73,7 +162,7 @@ enum TExplicitMemoryOrderType
 #define HOST_UINT               cl_uint
 #define HOST_LONG               cl_long
 #define HOST_ULONG              cl_ulong
-#define HOST_HALF cl_half
+#define HOST_HALF HostHalf
 #define HOST_FLOAT              cl_float
 #define HOST_DOUBLE             cl_double
 
@@ -91,6 +180,18 @@ enum TExplicitMemoryOrderType
 
 extern cl_half_rounding_mode gHalfRoundingMode;
 
+template <typename HostAtomicType>
+constexpr bool is_host_atomic_fp_v =
+    std::disjunction_v<std::is_same<HostAtomicType, HOST_ATOMIC_HALF>,
+                       std::is_same<HostAtomicType, HOST_ATOMIC_FLOAT>,
+                       std::is_same<HostAtomicType, HOST_ATOMIC_DOUBLE>>;
+
+template <typename HostDataType>
+constexpr bool is_host_fp_v =
+    std::disjunction_v<std::is_same<HostDataType, HOST_HALF>,
+                       std::is_same<HostDataType, HOST_FLOAT>,
+                       std::is_same<HostDataType, HOST_DOUBLE>>;
+
 // host atomic functions
 void host_atomic_thread_fence(TExplicitMemoryOrderType order);
 
@@ -98,24 +199,13 @@ template <typename AtomicType, typename CorrespondingType>
 CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
                                         TExplicitMemoryOrderType order)
 {
-    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
+    if constexpr (is_host_atomic_fp_v<AtomicType>)
     {
         static std::mutex mx;
         std::lock_guard<std::mutex> lock(mx);
         CorrespondingType old_value = *a;
-        *a = cl_half_from_float((cl_half_to_float(*a) + cl_half_to_float(c)),
-                                gHalfRoundingMode);
-        return old_value;
-    }
-    else if constexpr (
-        std::is_same_v<
-            AtomicType,
-            HOST_ATOMIC_FLOAT> || std::is_same_v<AtomicType, HOST_ATOMIC_DOUBLE>)
-    {
-        static std::mutex mx;
-        std::lock_guard<std::mutex> lock(mx);
-        CorrespondingType old_value = *a;
-        *a += c;
+        CorrespondingType new_value = old_value + c;
+        *a = static_cast<AtomicType>(new_value);
         return old_value;
     }
     else
@@ -135,24 +225,13 @@ template <typename AtomicType, typename CorrespondingType>
 CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
                                         TExplicitMemoryOrderType order)
 {
-    if constexpr (
-        std::is_same_v<
-            AtomicType,
-            HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
+    if constexpr (is_host_atomic_fp_v<AtomicType>)
     {
         static std::mutex mx;
         std::lock_guard<std::mutex> lock(mx);
         CorrespondingType old_value = *a;
-        *a -= c;
-        return old_value;
-    }
-    else if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
-    {
-        static std::mutex mx;
-        std::lock_guard<std::mutex> lock(mx);
-        CorrespondingType old_value = *a;
-        *a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)),
-                                gHalfRoundingMode);
+        CorrespondingType new_value = old_value - c;
+        *a = static_cast<AtomicType>(new_value);
         return old_value;
     }
     else
@@ -173,12 +252,14 @@ CorrespondingType host_atomic_exchange(volatile AtomicType *a, CorrespondingType
                                        TExplicitMemoryOrderType order)
 {
 #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
-    if (sizeof(CorrespondingType) == 2)
-        return InterlockedExchange16(reinterpret_cast<volatile SHORT *>(a), c);
+    if constexpr (sizeof(CorrespondingType) == 2)
+        return InterlockedExchange16(reinterpret_cast<volatile SHORT *>(a),
+                                     *reinterpret_cast<SHORT *>(&c));
     else
-        return InterlockedExchange(reinterpret_cast<volatile LONG *>(a), c);
+        return InterlockedExchange(reinterpret_cast<volatile LONG *>(a),
+                                   *reinterpret_cast<LONG *>(&c));
 #elif defined(__GNUC__)
-    return __sync_lock_test_and_set(a, c);
+    return __sync_lock_test_and_set(a, *reinterpret_cast<AtomicType *>(&c));
 #else
   log_info("Host function not implemented: atomic_exchange\n");
   return 0;
@@ -195,30 +276,14 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
                                   TExplicitMemoryOrderType order_failure)
 {
     CorrespondingType tmp;
-    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
+    if constexpr (is_host_atomic_fp_v<AtomicType>)
     {
         static std::mutex mtx;
         std::lock_guard<std::mutex> lock(mtx);
-        tmp = *reinterpret_cast<volatile cl_half *>(a);
-
-        if (cl_half_to_float(tmp) == cl_half_to_float(*expected))
-        {
-            *reinterpret_cast<volatile cl_half *>(a) = desired;
-            return true;
-        }
-        *expected = tmp;
-    }
-    else if constexpr (
-        std::is_same_v<
-            AtomicType,
-            HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
-    {
-        static std::mutex mtx;
-        std::lock_guard<std::mutex> lock(mtx);
-        tmp = *reinterpret_cast<volatile float *>(a);
+        tmp = static_cast<CorrespondingType>(*a);
         if (tmp == *expected)
         {
-            *a = desired;
+            *a = static_cast<AtomicType>(desired);
             return true;
         }
         *expected = tmp;
@@ -244,8 +309,8 @@ CorrespondingType host_atomic_load(volatile AtomicType *a,
                                    TExplicitMemoryOrderType order)
 {
 #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
-    if (sizeof(CorrespondingType) == 2)
-        auto prev = InterlockedOr16(reinterpret_cast<volatile SHORT *>(a), 0);
+    if constexpr (sizeof(CorrespondingType) == 2)
+        return InterlockedOr16(reinterpret_cast<volatile SHORT *>(a), 0);
     else
         return InterlockedExchangeAdd(reinterpret_cast<volatile LONG *>(a), 0);
 #elif defined(__GNUC__)
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index 56f350ad..b1326e28 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -94,13 +94,7 @@ public:
                                HostDataType *startRefValues,
                                cl_uint whichDestValue)
     {
-        if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
-                ._type
-            != TYPE_ATOMIC_HALF)
-            expected = (HostDataType)whichDestValue;
-        else
-            expected = cl_half_from_float(static_cast<float>(whichDestValue),
-                                          gHalfRoundingMode);
+        expected = static_cast<HostDataType>(whichDestValue);
         return true;
     }
 };
@@ -401,13 +395,7 @@ public:
                                HostDataType *startRefValues,
                                cl_uint whichDestValue)
     {
-        if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
-                ._type
-            != TYPE_ATOMIC_HALF)
-            expected = (HostDataType)whichDestValue;
-        else
-            expected = cl_half_from_float(static_cast<float>(whichDestValue),
-                                          gHalfRoundingMode);
+        expected = static_cast<HostDataType>(whichDestValue);
         return true;
     }
     virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
@@ -417,25 +405,11 @@ public:
         correct = true;
         for (cl_uint i = 0; i < threadCount; i++)
         {
-            if constexpr (std::is_same_v<HostDataType, cl_half>)
+            if (refValues[i] != (HostDataType)i)
             {
-                HostDataType test = cl_half_from_float(static_cast<float>(i),
-                                                       gHalfRoundingMode);
-                if (refValues[i] != test)
-                {
-                    log_error("Invalid value for thread %u\n", (cl_uint)i);
-                    correct = false;
-                    return true;
-                }
-            }
-            else
-            {
-                if (refValues[i] != (HostDataType)i)
-                {
-                    log_error("Invalid value for thread %u\n", (cl_uint)i);
-                    correct = false;
-                    return true;
-                }
+                log_error("Invalid value for thread %u\n", (cl_uint)i);
+                correct = false;
+                return true;
             }
         }
         return true;
@@ -553,11 +527,7 @@ public:
         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
                                                                 useSVM)
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
-            StartValue(cl_half_from_float(static_cast<float>(1234),
-                                          gHalfRoundingMode));
-        else
-            StartValue(123456);
+        StartValue(1234);
     }
     virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                                   cl_command_queue queue)
@@ -619,32 +589,19 @@ public:
         /* Any repeated value is treated as an error */
         std::vector<bool> tidFound(threadCount);
         bool startValueFound = false;
-        cl_uint startVal = StartValue();
-
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
-            startVal = static_cast<cl_uint>(
-                cl_half_to_float(static_cast<cl_half>(StartValue())));
+        cl_uint startVal = static_cast<cl_uint>(StartValue());
 
         for (cl_uint i = 0; i <= threadCount; i++)
         {
             cl_uint value = 0;
             if (i == threadCount)
             {
-                if constexpr (!std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
-                    value =
-                        (cl_uint)finalValues[0]; // additional value from atomic
-                                                 // variable (last written)
-                else
-                    value =
-                        cl_half_to_float(static_cast<cl_half>(finalValues[0]));
+                value = static_cast<cl_uint>(
+                    static_cast<HostDataType>(finalValues[0]));
             }
             else
             {
-                if constexpr (!std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
-                    value = (cl_uint)refValues[i];
-                else
-                    value =
-                        cl_half_to_float(static_cast<cl_half>(refValues[i]));
+                value = static_cast<cl_uint>(refValues[i]);
             }
 
             if (value == startVal)
@@ -1201,85 +1158,24 @@ public:
                                                                 useSVM),
           min_range(-999.0), max_range(999.0), max_error(0.0)
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             StartValue((HostDataType)0.0);
             CBasicTestMemOrderScope<HostAtomicType,
                                     HostDataType>::OldValueCheck(false);
+
+            // Narrow down range for half to avoid overflow to infinity
+            if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+            {
+                min_range = -50.0;
+                max_range = 50.0;
+            }
         }
     }
-    template <typename Iterator> float accum_halfs(Iterator begin, Iterator end)
-    {
-        cl_half sum = 0;
-        for (auto it = begin; it != end; ++it)
-        {
-            sum = cl_half_from_float(cl_half_to_float(sum)
-                                         + cl_half_to_float(*it),
-                                     gHalfRoundingMode);
-        }
-        return cl_half_to_float(sum);
-    }
     bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                       MTdata d) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (threadCount > ref_vals.size())
-            {
-                ref_vals.resize(threadCount);
-
-                for (cl_uint i = 0; i < threadCount; i++)
-                    ref_vals[i] = cl_half_from_float(
-                        get_random_float(min_range, max_range, d),
-                        gHalfRoundingMode);
-
-                memcpy(startRefValues, ref_vals.data(),
-                       sizeof(HostDataType) * ref_vals.size());
-
-                // Estimate highest possible summation error for given set.
-                std::vector<float> sums;
-                std::sort(ref_vals.begin(), ref_vals.end(),
-                          [](cl_half a, cl_half b) {
-                              return cl_half_to_float(a) < cl_half_to_float(b);
-                          });
-
-                sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
-
-                std::sort(ref_vals.begin(), ref_vals.end(),
-                          [](cl_half a, cl_half b) {
-                              return std::abs(cl_half_to_float(a))
-                                  < std::abs(cl_half_to_float(b));
-                          });
-
-                float precise = 0.f;
-                for (auto elem : ref_vals) precise += cl_half_to_float(elem);
-                sums.push_back(precise);
-
-                sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
-
-                std::sort(sums.begin(), sums.end());
-                max_error = std::abs(sums.front() - sums.back());
-
-                // restore unsorted order
-                memcpy(ref_vals.data(), startRefValues,
-                       sizeof(HostDataType) * ref_vals.size());
-            }
-            else
-            {
-                memcpy(startRefValues, ref_vals.data(),
-                       sizeof(HostDataType) * threadCount);
-            }
-            return true;
-        }
-        else if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (threadCount > ref_vals.size())
             {
@@ -1299,11 +1195,12 @@ public:
                 std::vector<HostDataType> sums;
                 std::sort(ref_vals.begin(), ref_vals.end());
 
-                sums.push_back(
-                    std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
+                sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(),
+                                               static_cast<HostDataType>(0.f)));
 
-                sums.push_back(
-                    std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
+                sums.push_back(std::accumulate(ref_vals.rbegin(),
+                                               ref_vals.rend(),
+                                               static_cast<HostDataType>(0.f)));
 
                 std::sort(ref_vals.begin(), ref_vals.end(),
                           [](HostDataType a, HostDataType b) {
@@ -1318,15 +1215,25 @@ public:
 
                 sums.push_back(precise);
 
-                sums.push_back(
-                    std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
+                sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(),
+                                               static_cast<HostDataType>(0.f)));
 
-                sums.push_back(
-                    std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
+                sums.push_back(std::accumulate(ref_vals.rbegin(),
+                                               ref_vals.rend(),
+                                               static_cast<HostDataType>(0.f)));
 
                 std::sort(sums.begin(), sums.end());
+                assert(std::all_of(sums.begin(), sums.end(),
+                                   [](const HostDataType &val) {
+                                       return std::isfinite(
+                                           static_cast<double>(val));
+                                   })
+                       && "Infinite summation value detected!");
                 max_error = std::abs(sums.front() - sums.back());
 
+                log_info("Max allowed error for %u elements: %.10f\n",
+                         threadCount, max_error);
+
                 // restore unsorted order
                 memcpy(ref_vals.data(), startRefValues,
                        sizeof(HostDataType) * ref_vals.size());
@@ -1345,10 +1252,7 @@ public:
         std::string memoryOrderScope = MemoryOrderScopeStr();
         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
 
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return "  atomic_fetch_add" + postfix + "(&destMemory[0], ("
                 + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1382,10 +1286,7 @@ public:
                       volatile HostAtomicType *destMemory,
                       HostDataType *oldValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
                                   MemoryOrder());
@@ -1411,23 +1312,7 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-            {
-                for (cl_uint i = 0; i < threadCount; i++)
-                {
-                    expected = cl_half_from_float(
-                        cl_half_to_float(expected)
-                            + cl_half_to_float(startRefValues[i]),
-                        gHalfRoundingMode);
-                }
-            }
-        }
-        else if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 for (cl_uint i = 0; i < threadCount; i++)
@@ -1446,21 +1331,13 @@ public:
                              const std::vector<HostAtomicType> &testValues,
                              cl_uint whichDestValue) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
-                return std::abs(cl_half_to_float(expected)
-                                - cl_half_to_float(testValues[whichDestValue]))
-                    > max_error;
-        }
-        else if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
-        {
-            if (whichDestValue == 0)
-                return std::abs((HostDataType)expected
-                                - testValues[whichDestValue])
+                return std::abs(
+                           static_cast<double>(expected
+                                               - static_cast<HostDataType>(
+                                                   testValues[whichDestValue])))
                     > max_error;
         }
         return CBasicTestMemOrderScope<
@@ -1471,10 +1348,7 @@ public:
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                     HostAtomicType *finalValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             correct = true;
             for (cl_uint i = 1; i < threadCount; i++)
@@ -1534,10 +1408,7 @@ public:
     }
     cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return threadCount;
         }
@@ -1657,8 +1528,8 @@ template <> double kahan_sub<double>(const std::vector<double> &nums)
     double compensation = 0.0;
     for (double num : nums)
     {
-        double y = num - compensation;
-        double t = sum - y;
+        double y = -num - compensation;
+        double t = sum + y;
         compensation = (t - sum) - y;
         sum = t;
     }
@@ -1685,14 +1556,18 @@ public:
                                                                 useSVM),
           min_range(-999.0), max_range(999.0), max_error(0.0)
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             StartValue(0);
             CBasicTestMemOrderScope<HostAtomicType,
                                     HostDataType>::OldValueCheck(false);
+
+            // Narrow down range for half to avoid overflow to infinity
+            if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+            {
+                min_range = -50.0;
+                max_range = 50.0;
+            }
         }
     }
     template <typename Iterator>
@@ -1702,25 +1577,10 @@ public:
         for (auto it = begin; it != end; ++it) res = res - *it;
         return res;
     }
-    template <typename Iterator>
-    float subtract_halfs(Iterator begin, Iterator end)
-    {
-        cl_half res = 0;
-        for (auto it = begin; it != end; ++it)
-        {
-            res = cl_half_from_float(cl_half_to_float(res)
-                                         - cl_half_to_float(*it),
-                                     gHalfRoundingMode);
-        }
-        return cl_half_to_float(res);
-    }
     bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                       MTdata d) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (threadCount > ref_vals.size())
             {
@@ -1736,105 +1596,36 @@ public:
                 // Estimate highest possible subtraction error for given set.
                 std::vector<HostDataType> sums;
                 std::sort(ref_vals.begin(), ref_vals.end());
+                sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
+                sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
 
+                std::sort(ref_vals.begin(), ref_vals.end(),
+                          [](HostDataType a, HostDataType b) {
+                              return std::abs(a) < std::abs(b);
+                          });
+
+                double precise = 0.0;
                 if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
-                {
-                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
-
-                    sums.push_back(
-                        subtract(ref_vals.rbegin(), ref_vals.rend()));
-
-                    std::sort(ref_vals.begin(), ref_vals.end(),
-                              [](double a, double b) {
-                                  return std::abs(a) < std::abs(b);
-                              });
-
-                    double precise = kahan_sub(ref_vals);
-                    sums.push_back(precise);
-
-                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
-
-                    sums.push_back(
-                        subtract(ref_vals.rbegin(), ref_vals.rend()));
-
-                    std::sort(sums.begin(), sums.end());
-                    max_error = std::abs((double)sums.front() - sums.back());
-                }
+                    precise = kahan_sub(ref_vals);
                 else
-                {
-                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
-                    sums.push_back(
-                        subtract(ref_vals.rbegin(), ref_vals.rend()));
-
-                    std::sort(ref_vals.begin(), ref_vals.end(),
-                              [](float a, float b) {
-                                  return std::abs(a) < std::abs(b);
-                              });
-
-                    double precise = 0.0;
                     for (auto elem : ref_vals) precise += double(elem);
-                    sums.push_back(precise);
-                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
-                    sums.push_back(
-                        subtract(ref_vals.rbegin(), ref_vals.rend()));
-
-                    std::sort(sums.begin(), sums.end());
-                    max_error =
-                        std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
-                }
-
-                // restore unsorted order
-                memcpy(ref_vals.data(), startRefValues,
-                       sizeof(HostDataType) * ref_vals.size());
-            }
-            else
-            {
-                memcpy(startRefValues, ref_vals.data(),
-                       sizeof(HostDataType) * threadCount);
-            }
-            return true;
-        }
-        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (threadCount > ref_vals.size())
-            {
-                ref_vals.resize(threadCount);
-                for (cl_uint i = 0; i < threadCount; i++)
-                    ref_vals[i] = cl_half_from_float(
-                        get_random_float(min_range, max_range, d),
-                        gHalfRoundingMode);
-
-                memcpy(startRefValues, ref_vals.data(),
-                       sizeof(HostDataType) * ref_vals.size());
-
-                // Estimate highest possible summation error for given set.
-                std::vector<float> sums;
-                std::sort(ref_vals.begin(), ref_vals.end(),
-                          [](cl_half a, cl_half b) {
-                              return cl_half_to_float(a) < cl_half_to_float(b);
-                          });
-
-                sums.push_back(
-                    subtract_halfs(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(
-                    subtract_halfs(ref_vals.rbegin(), ref_vals.rend()));
-
-                std::sort(ref_vals.begin(), ref_vals.end(),
-                          [](cl_half a, cl_half b) {
-                              return std::abs(cl_half_to_float(a))
-                                  < std::abs(cl_half_to_float(b));
-                          });
-
-                float precise = 0.f;
-                for (auto elem : ref_vals) precise -= cl_half_to_float(elem);
                 sums.push_back(precise);
-                sums.push_back(
-                    subtract_halfs(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(
-                    subtract_halfs(ref_vals.rbegin(), ref_vals.rend()));
+
+                sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
+                sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
 
                 std::sort(sums.begin(), sums.end());
+                assert(std::all_of(sums.begin(), sums.end(),
+                                   [](const HostDataType &val) {
+                                       return std::isfinite(
+                                           static_cast<double>(val));
+                                   })
+                       && "Infinite subtraction value detected!");
                 max_error = std::abs(sums.front() - sums.back());
+
+                log_info("Max allowed error for %u elements: %.10f\n",
+                         threadCount, max_error);
+
                 // restore unsorted order
                 memcpy(ref_vals.data(), startRefValues,
                        sizeof(HostDataType) * ref_vals.size());
@@ -1853,10 +1644,7 @@ public:
         std::string memoryOrderScope = MemoryOrderScopeStr();
         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
 
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return "  atomic_fetch_sub" + postfix + "(&destMemory[0], ("
                 + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1878,10 +1666,7 @@ public:
                       volatile HostAtomicType *destMemory,
                       HostDataType *oldValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
                                   MemoryOrder());
@@ -1903,29 +1688,12 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 for (cl_uint i = 0; i < threadCount; i++)
                     expected -= startRefValues[i];
         }
-        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-            {
-                for (cl_uint i = 0; i < threadCount; i++)
-                {
-                    expected = cl_half_from_float(
-                        cl_half_to_float(expected)
-                            - cl_half_to_float(startRefValues[i]),
-                        gHalfRoundingMode);
-                }
-            }
-        }
         else
         {
             for (cl_uint i = 0; i < threadCount; i++)
@@ -1938,21 +1706,13 @@ public:
                              const std::vector<HostAtomicType> &testValues,
                              cl_uint whichDestValue) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
-                return std::abs((HOST_ATOMIC_FLOAT)expected
-                                - testValues[whichDestValue])
-                    > max_error;
-        }
-        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-                return std::abs(cl_half_to_float(expected)
-                                - cl_half_to_float(testValues[whichDestValue]))
+                return std::abs(
+                           static_cast<double>(expected
+                                               - static_cast<HostDataType>(
+                                                   testValues[whichDestValue])))
                     > max_error;
         }
         return CBasicTestMemOrderScope<
@@ -2023,10 +1783,7 @@ public:
     }
     cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return threadCount;
         }
@@ -2947,10 +2704,7 @@ public:
           min_range(-999.0), max_range(999.0)
     {
         StartValue(DataType().MaxValue());
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             CBasicTestMemOrderScope<HostAtomicType,
                                     HostDataType>::OldValueCheck(false);
@@ -2960,10 +2714,7 @@ public:
     {
         std::string memoryOrderScope = MemoryOrderScopeStr();
         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return "  atomic_fetch_min" + postfix
                 + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
@@ -2982,10 +2733,7 @@ public:
                       volatile HostAtomicType *destMemory,
                       HostDataType *oldValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             host_atomic_fetch_min(&destMemory[0], oldValues[tid],
                                   MemoryOrder());
@@ -3040,19 +2788,7 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-            {
-                for (cl_uint i = 0; i < threadCount; i++)
-                {
-                    if (cl_half_to_float(startRefValues[i])
-                        < cl_half_to_float(expected))
-                        expected = startRefValues[i];
-                }
-            }
-        }
-        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 for (cl_uint i = 0; i < threadCount; i++)
@@ -3072,9 +2808,7 @@ public:
                              const std::vector<HostAtomicType> &testValues,
                              cl_uint whichDestValue) override
     {
-        if (std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
@@ -3089,17 +2823,16 @@ public:
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                     HostAtomicType *finalValues) override
     {
-        if (std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same<HostDataType, HOST_FLOAT>::value)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             correct = true;
             for (cl_uint i = 1; i < threadCount; i++)
             {
                 if (refValues[i] != StartValue())
                 {
-                    log_error("Thread %d found %d mismatch(es)\n", i,
-                              (cl_uint)refValues[i]);
+                    log_error(
+                        "Thread %d found %lf mismatch(es), start value=%lf\n",
+                        i, (double)refValues[i], (double)StartValue());
                     correct = false;
                 }
             }
@@ -3156,10 +2889,7 @@ public:
     }
     cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return threadCount;
         }
@@ -3287,21 +3017,11 @@ public:
                                                                 useSVM),
           min_range(-999.0), max_range(999.0)
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        StartValue(DataType().MinValue());
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             CBasicTestMemOrderScope<HostAtomicType,
                                     HostDataType>::OldValueCheck(false);
-            if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-                StartValue(cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode));
-            else
-                StartValue(-DataType().MaxValue());
-        }
-        else
-        {
-            StartValue(DataType().MinValue());
         }
     }
     std::string ProgramCore() override
@@ -3330,10 +3050,7 @@ public:
                       volatile HostAtomicType *destMemory,
                       HostDataType *oldValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             host_atomic_fetch_max(&destMemory[0], oldValues[tid],
                                   MemoryOrder());
@@ -3349,23 +3066,12 @@ public:
     bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                       MTdata d) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             for (cl_uint i = 0; i < threadCount; i++)
             {
-                startRefValues[i] = cl_half_from_float(
-                    get_random_float(min_range, max_range, d),
-                    gHalfRoundingMode);
-            }
-        }
-        else if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
-        {
-            for (cl_uint i = 0; i < threadCount; i++)
-            {
-                startRefValues[i] = get_random_float(min_range, max_range, d);
+                startRefValues[i] = static_cast<HostDataType>(
+                    get_random_float(min_range, max_range, d));
             }
         }
         else
@@ -3388,19 +3094,7 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-            {
-                for (cl_uint i = 0; i < threadCount; i++)
-                {
-                    if (cl_half_to_float(startRefValues[i])
-                        > cl_half_to_float(expected))
-                        expected = startRefValues[i];
-                }
-            }
-        }
-        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 for (cl_uint i = 0; i < threadCount; i++)
@@ -3420,9 +3114,7 @@ public:
                              const std::vector<HostAtomicType> &testValues,
                              cl_uint whichDestValue) override
     {
-        if (std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
@@ -3437,17 +3129,16 @@ public:
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                     HostAtomicType *finalValues) override
     {
-        if (std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             correct = true;
             for (cl_uint i = 1; i < threadCount; i++)
             {
                 if (refValues[i] != StartValue())
                 {
-                    log_error("Thread %d found %d mismatch(es)\n", i,
-                              (cl_uint)refValues[i]);
+                    log_error(
+                        "Thread %d found %lf mismatch(es), start value=%lf\n",
+                        i, (double)refValues[i], (double)StartValue());
                     correct = false;
                 }
             }
@@ -3504,10 +3195,7 @@ public:
     }
     cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return threadCount;
         }

From 6b20272159a22331531ffa12be0e967bc035b700 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin@google.com>
Date: Tue, 16 Dec 2025 18:23:37 +0100
Subject: [PATCH 29/54] fix compiler/options_build_optimizations (#2595)

We need to define -cl-std when compiling with option not in 1.0.
---
 test_conformance/compiler/test_build_options.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/test_conformance/compiler/test_build_options.cpp b/test_conformance/compiler/test_build_options.cpp
index 52cb8eaa..15348e06 100644
--- a/test_conformance/compiler/test_build_options.cpp
+++ b/test_conformance/compiler/test_build_options.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "testBase.h"
+#include "harness/kernelHelpers.h"
 #include "harness/os_helpers.h"
 #include "harness/testHarness.h"
 
@@ -101,7 +102,10 @@ REGISTER_TEST(options_build_optimizations)
             continue;
         }
 
-        const char *option = optimization_option.first;
+        auto build_options = std::string("-cl-std=CL")
+            + get_max_OpenCL_C_for_context(context).to_string() + " "
+            + optimization_option.first;
+        const char *option = build_options.c_str();
         clProgramWrapper program;
         error = create_single_kernel_helper_create_program(
             context, &program, 1, options_test_kernel, option);
@@ -431,7 +435,10 @@ REGISTER_TEST(options_uniform_work_group_size)
     {
         return TEST_SKIPPED_ITSELF;
     }
-    const char *options = "-cl-uniform-work-group-size";
+    std::string build_options = "-cl-std=CL"
+        + get_max_OpenCL_C_for_context(context).to_string()
+        + " -cl-uniform-work-group-size";
+    const char *options = build_options.c_str();
     clProgramWrapper program;
     int error = create_single_kernel_helper_create_program(
         context, &program, 1, options_test_kernel, options);

From b460aae39db67e99bb60df814b431c6231349235 Mon Sep 17 00:00:00 2001
From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com>
Date: Tue, 16 Dec 2025 17:25:44 +0000
Subject: [PATCH 30/54] Add AHB sub buffer test (#2592)

Add test for clCreateSubBuffer using a buffer created from a
AHardwareBuffer

Signed-off-by: Alex Davicenko <alex.davicenko@arm.com>
Co-authored-by: Alex Davicenko <alex.davicenko@arm.com>
---
 .../cl_khr_external_memory_ahb/main.cpp       |   2 +-
 .../cl_khr_external_memory_ahb/test_ahb.cpp   | 208 ++++++++++++++++--
 2 files changed, 193 insertions(+), 17 deletions(-)

diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp
index 8900e6ff..c1079a4d 100644
--- a/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp
+++ b/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp
@@ -20,4 +20,4 @@ int main(int argc, const char *argv[])
 {
     return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
                           test_registry::getInstance().definitions(), false, 0);
-}
\ No newline at end of file
+}
diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
index 303eace7..6220a87e 100644
--- a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
+++ b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
@@ -23,9 +23,32 @@
 #include <android/hardware_buffer.h>
 #include "debug_ahb.h"
 
-static bool isAHBUsageReadable(const AHardwareBuffer_UsageFlags usage)
+static bool isAHBUsageReadableHost(AHardwareBuffer_UsageFlags usage)
 {
-    return (AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE & usage) != 0;
+    return (AHARDWAREBUFFER_USAGE_CPU_READ_MASK & usage) != 0;
+}
+
+static bool isAHBUsageWritableHost(AHardwareBuffer_UsageFlags usage)
+{
+    return (AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK & usage) != 0;
+}
+
+static bool isAHBUsageReadableDevice(const AHardwareBuffer_UsageFlags usage)
+{
+    return ((AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE
+             | AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER
+             | AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA)
+            & usage)
+        != 0;
+}
+
+static cl_ulong getMaxAllocSize(cl_device_id device)
+{
+    cl_ulong ret;
+    cl_int err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                                 sizeof(cl_ulong), &ret, nullptr);
+    test_error(err, "clGetDeviceInfo failed");
+    return ret;
 }
 
 struct ahb_format_table
@@ -50,6 +73,32 @@ ahb_image_size_table test_sizes[] = {
     { 64, 64 }, { 128, 128 }, { 256, 256 }, { 512, 512 }
 };
 
+uint32_t test_buffer_sizes[] = { 2, 8, 32, 128, 512, 2048, 16384, 65536 };
+
+ahb_usage_table test_buffer_usages[] = {
+    { static_cast<AHardwareBuffer_UsageFlags>(
+        AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
+        | AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY
+        | AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) },
+    { static_cast<AHardwareBuffer_UsageFlags>(
+        AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
+        | AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY
+        | AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA) },
+    { static_cast<AHardwareBuffer_UsageFlags>(
+        AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
+        | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
+        | AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) },
+    { static_cast<AHardwareBuffer_UsageFlags>(
+        AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
+        | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
+        | AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA) },
+    { static_cast<AHardwareBuffer_UsageFlags>(
+        AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
+        | AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY) },
+    { static_cast<AHardwareBuffer_UsageFlags>(
+        AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN) }
+};
+
 ahb_usage_table test_usages[] = {
     { static_cast<AHardwareBuffer_UsageFlags>(
         AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
@@ -234,8 +283,9 @@ REGISTER_TEST(images_read)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageWritableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -529,8 +579,9 @@ REGISTER_TEST(enqueue_read_image)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageWritableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -707,8 +758,9 @@ REGISTER_TEST(enqueue_copy_image)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageWritableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -1010,8 +1062,9 @@ REGISTER_TEST(enqueue_copy_image_to_buffer)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageWritableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -1198,8 +1251,8 @@ REGISTER_TEST(enqueue_copy_buffer_to_image)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -1393,8 +1446,8 @@ REGISTER_TEST(enqueue_write_image)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -1585,8 +1638,8 @@ REGISTER_TEST(enqueue_fill_image)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -2160,3 +2213,126 @@ REGISTER_TEST(lifetime_image)
     }
     return TEST_PASS;
 }
+
+
+/* Testing clCreateSubBuffer
+ *  Create AHB
+ *  Write to AHB
+ *  Create CL buffer from AHB
+ *  Create a sub buffer into half of the buffer
+ *  Read & verify sub buffer
+ */
+REGISTER_TEST(sub_buffer)
+{
+    cl_int err;
+    RandomSeed seed(gRandomSeed);
+
+    if (!is_extension_available(
+            device, "cl_khr_external_memory_android_hardware_buffer"))
+    {
+        log_info("cl_khr_external_memory_android_hardware_buffer is not "
+                 "supported on this platform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
+    aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
+    for (auto usage : test_buffer_usages)
+    {
+        if (!(isAHBUsageReadableHost(usage.usageFlags)
+              && isAHBUsageWritableHost(usage.usageFlags)
+              && isAHBUsageReadableDevice(usage.usageFlags)))
+        {
+            continue;
+        }
+
+        aHardwareBufferDesc.usage = usage.usageFlags;
+        for (uint32_t buffer_size : test_buffer_sizes)
+        {
+            if (buffer_size > getMaxAllocSize(device))
+            {
+                continue;
+            }
+
+            aHardwareBufferDesc.width = buffer_size;
+            aHardwareBufferDesc.height = 1;
+            aHardwareBufferDesc.layers = 1;
+            if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
+            {
+                log_unsupported_ahb_format(aHardwareBufferDesc);
+                continue;
+            }
+
+            AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
+
+            log_info("Testing usage: %s, buffer size: %u\n",
+                     ahardwareBufferDecodeUsageFlagsToString(usage.usageFlags)
+                         .c_str(),
+                     buffer_size);
+
+            void *hardware_buffer_data = nullptr;
+            int ahb_result = AHardwareBuffer_lock(
+                aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY, -1,
+                nullptr, &hardware_buffer_data);
+            if (ahb_result != 0)
+            {
+                log_error("AHardwareBuffer_lock failed with code %d\n",
+                          ahb_result);
+                return TEST_FAIL;
+            }
+
+            std::vector<uint8_t> host_buffer(buffer_size);
+
+            generate_random_data(ExplicitType::kUnsignedChar, buffer_size, seed,
+                                 host_buffer.data());
+
+            memcpy(hardware_buffer_data, host_buffer.data(), buffer_size);
+
+            ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr);
+            if (ahb_result != 0)
+            {
+                log_error("AHardwareBuffer_unlock failed with code %d\n",
+                          ahb_result);
+                return TEST_FAIL;
+            }
+
+            cl_mem_properties props[] = {
+                CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
+                aHardwareBuffer.get_props(), 0
+            };
+
+            clMemWrapper buffer = clCreateBufferWithProperties(
+                context, props, CL_MEM_READ_WRITE, 0, nullptr, &err);
+            test_error(err, "Failed to create CL buffer from AHardwareBuffer");
+
+            cl_uint sub_buffer_size = buffer_size / 2;
+            cl_buffer_region region = { 0 };
+            region.origin = 0;
+            region.size = sub_buffer_size;
+
+            clMemWrapper sub_buffer =
+                clCreateSubBuffer(buffer, CL_MEM_READ_WRITE,
+                                  CL_BUFFER_CREATE_TYPE_REGION, &region, &err);
+            test_error(err, "clCreateSubBuffer failed");
+
+            std::vector<uint8_t> host_sub_buffer(sub_buffer_size);
+            err = clEnqueueReadBuffer(queue, sub_buffer, true, 0,
+                                      sub_buffer_size, host_sub_buffer.data(),
+                                      0, nullptr, nullptr);
+            test_error(err, "clEnqueueReadBuffer failed");
+
+            for (size_t i = 0; i < sub_buffer_size; ++i)
+            {
+                if (host_buffer[i] != host_sub_buffer[i])
+                {
+                    log_error(
+                        "At position i=%zu expected value %u but got %u\n", i,
+                        host_buffer[i], host_sub_buffer[i]);
+                    return TEST_FAIL;
+                }
+            }
+        }
+    }
+    return TEST_PASS;
+}

From 6774fc1dc31dd77bf5ce4f30b4e70412a75834cf Mon Sep 17 00:00:00 2001
From: Michael Rizkalla <michael.rizkalla@arm.com>
Date: Tue, 13 Jan 2026 17:43:13 +0000
Subject: [PATCH 31/54] Add negative tests for context API functions (#2494)

This PR adds negative tests for the following API functions:

- clCreateContext
- clCreateContextFromType
- clRetainContext
- clReleaseContext
- clSetContextDestructorCallback

Also, define a new macro `test_object_failure_ret` to test for an
expected error code and the returned object is `NULL` as a result of a
failure.

Signed-off-by: Michael Rizkalla <michael.rizkalla@arm.com>
---
 test_common/harness/errorHelpers.h        |   6 +
 test_conformance/api/CMakeLists.txt       |   1 +
 test_conformance/api/negative_context.cpp | 290 ++++++++++++++++++++++
 3 files changed, 297 insertions(+)
 create mode 100644 test_conformance/api/negative_context.cpp

diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h
index cb1a9113..7e59f915 100644
--- a/test_common/harness/errorHelpers.h
+++ b/test_common/harness/errorHelpers.h
@@ -120,6 +120,12 @@ static int vlog_win32(const char *format, ...);
             return retValue;                                                   \
         }                                                                      \
     }
+#define test_object_failure_ret(object, errCode, expectedErrCode, msg,         \
+                                retValue)                                      \
+    {                                                                          \
+        test_assert_error_ret(object == nullptr, msg, retValue);               \
+        test_failure_error_ret(errCode, expectedErrCode, msg, retValue);       \
+    }
 #define print_failure_error(errCode, expectedErrCode, msg)                     \
     log_error("ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg,            \
               IGetErrorString(errCode), IGetErrorString(expectedErrCode),      \
diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt
index 942fef89..fed0aace 100644
--- a/test_conformance/api/CMakeLists.txt
+++ b/test_conformance/api/CMakeLists.txt
@@ -6,6 +6,7 @@ set(${MODULE_NAME}_SOURCES
          main.cpp
          negative_platform.cpp
          negative_queue.cpp
+         negative_context.cpp
          negative_enqueue_marker.cpp
          negative_enqueue_map_image.cpp
          negative_device.cpp
diff --git a/test_conformance/api/negative_context.cpp b/test_conformance/api/negative_context.cpp
new file mode 100644
index 00000000..a19f1977
--- /dev/null
+++ b/test_conformance/api/negative_context.cpp
@@ -0,0 +1,290 @@
+//
+// Copyright (c) 2025 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+/* Negative Tests for clCreateContext */
+REGISTER_TEST(negative_create_context)
+{
+    cl_context_properties props[3] = {
+        CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(nullptr), 0
+    };
+    cl_int err = 0;
+    cl_context ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_PLATFORM,
+        "clCreateContext should return CL_INVALID_PLATFORM when:\"an invalid "
+        "platform object is used with the CL_CONTEXT_PLATFORM property\" using "
+        "a nullptr",
+        TEST_FAIL);
+
+    props[0] = reinterpret_cast<cl_context_properties>("INVALID_PROPERTY");
+
+    props[1] = reinterpret_cast<cl_context_properties>(nullptr);
+    ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_PROPERTY,
+        "clCreateContext should return CL_INVALID_PROPERTY when: \"context "
+        "property name in properties is not a supported property name\"",
+        TEST_FAIL);
+
+    if (get_device_cl_version(device) >= Version(1, 2))
+    {
+        cl_context_properties invalid_value{ -1 };
+        props[0] = CL_CONTEXT_INTEROP_USER_SYNC;
+        props[1] = invalid_value;
+        ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err);
+        test_object_failure_ret(
+            ctx, err, CL_INVALID_PROPERTY,
+            "clCreateContext should return CL_INVALID_PROPERTY when: \"the "
+            "value specified for a supported property name is not valid\"",
+            TEST_FAIL);
+
+        cl_bool property_value = CL_FALSE;
+        cl_context_properties duplicated_property[5] = {
+            CL_CONTEXT_INTEROP_USER_SYNC,
+            static_cast<cl_context_properties>(property_value),
+            CL_CONTEXT_INTEROP_USER_SYNC,
+            static_cast<cl_context_properties>(property_value), 0
+        };
+        ctx = clCreateContext(duplicated_property, 1, &device, nullptr, nullptr,
+                              &err);
+        test_object_failure_ret(
+            ctx, err, CL_INVALID_PROPERTY,
+            "clCreateContext should return CL_INVALID_PROPERTY when: \"the "
+            "same property name is specified more than once\"",
+            TEST_FAIL);
+    }
+
+    ctx = clCreateContext(nullptr, 1, nullptr, nullptr, nullptr, &err);
+    test_object_failure_ret(ctx, err, CL_INVALID_VALUE,
+                            "clCreateContext should return CL_INVALID_VALUE "
+                            "when: \"devices is NULL\"",
+                            TEST_FAIL);
+
+    ctx = clCreateContext(nullptr, 0, &device, nullptr, nullptr, &err);
+    test_object_failure_ret(ctx, err, CL_INVALID_VALUE,
+                            "clCreateContext should return CL_INVALID_VALUE "
+                            "when: \"num_devices is equal to zero\"",
+                            TEST_FAIL);
+
+    int user_data = 1; // Arbitrary non-NULL value
+    ctx = clCreateContext(nullptr, 1, &device, nullptr, &user_data, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_VALUE,
+        "clCreateContext should return CL_INVALID_VALUE when: \"pfn_notify is "
+        "NULL but user_data is not NULL\"",
+        TEST_FAIL);
+
+    cl_device_id invalid_device = nullptr;
+    ctx = clCreateContext(nullptr, 1, &invalid_device, nullptr, nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_DEVICE,
+        "clCreateContext should return CL_INVALID_DEVICE when: \"any device in "
+        "devices is not a valid device\" using a device set to nullptr",
+        TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clCreateContextFromType */
+REGISTER_TEST(negative_create_context_from_type)
+{
+    cl_platform_id platform = getPlatformFromDevice(device);
+
+    cl_context_properties props[5] = {
+        CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(nullptr),
+        0, 0, 0
+    };
+    cl_int err = 0;
+    cl_context ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT,
+                                             nullptr, nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_PLATFORM,
+        "clCreateContextFromType should return CL_INVALID_PLATFORM when: \"an "
+        "invalid platform object is used with the CL_CONTEXT_PLATFORM "
+        "property\" using a nullptr",
+        TEST_FAIL);
+
+    ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
+                                  nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_PLATFORM,
+        "clCreateContextFromType should return CL_INVALID_PLATFORM when: \"an "
+        "invalid platform object is used with the CL_CONTEXT_PLATFORM "
+        "property\" using a valid object that is NOT a platform",
+        TEST_FAIL);
+
+    props[1] = reinterpret_cast<cl_context_properties>(platform);
+    props[2] = reinterpret_cast<cl_context_properties>("INVALID_PROPERTY");
+    props[3] = reinterpret_cast<cl_context_properties>(nullptr);
+
+    ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
+                                  nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_PROPERTY,
+        "clCreateContextFromType should return CL_INVALID_PROPERTY when: "
+        "\"context property name in properties is not a supported property "
+        "name\"",
+        TEST_FAIL);
+
+    if (get_device_cl_version(device) >= Version(1, 2))
+    {
+        cl_context_properties invalid_value{ -1 };
+        props[2] = CL_CONTEXT_INTEROP_USER_SYNC;
+        props[3] = invalid_value;
+        ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
+                                      nullptr, &err);
+        test_object_failure_ret(
+            ctx, err, CL_INVALID_PROPERTY,
+            "clCreateContextFromType should return CL_INVALID_PROPERTY when: "
+            "\"the value specified for a supported property name is not "
+            "valid\"",
+            TEST_FAIL);
+
+        props[2] = CL_CONTEXT_PLATFORM;
+        props[3] = reinterpret_cast<cl_context_properties>(platform);
+        ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
+                                      nullptr, &err);
+        test_object_failure_ret(
+            ctx, err, CL_INVALID_PROPERTY,
+            "clCreateContextFromType should return CL_INVALID_PROPERTY when: "
+            "\"the same property name is specified more than once\"",
+            TEST_FAIL);
+    }
+
+    int user_data = 1; // Arbitrary non-NULL value
+    ctx = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_DEFAULT, nullptr,
+                                  &user_data, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_VALUE,
+        "clCreateContextFromType should return CL_INVALID_VALUE when: "
+        "\"pfn_notify is NULL but user_data is not NULL\"",
+        TEST_FAIL);
+
+    cl_device_type INVALID_DEVICE_TYPE = 0;
+    ctx = clCreateContextFromType(nullptr, INVALID_DEVICE_TYPE, nullptr,
+                                  nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_DEVICE_TYPE,
+        "clCreateContextFromType should return CL_INVALID_DEVICE_TYPE when: "
+        "\"device_type is not a valid value\"",
+        TEST_FAIL);
+
+    std::vector<cl_device_type> device_types = { CL_DEVICE_TYPE_CPU,
+                                                 CL_DEVICE_TYPE_GPU,
+                                                 CL_DEVICE_TYPE_ACCELERATOR };
+    if (get_device_cl_version(device) >= Version(1, 2))
+    {
+        device_types.push_back(CL_DEVICE_TYPE_CUSTOM);
+    }
+    for (auto type : device_types)
+    {
+        clContextWrapper tmp_context =
+            clCreateContextFromType(nullptr, type, nullptr, nullptr, &err);
+        if (err != CL_SUCCESS)
+        {
+            test_object_failure_ret(
+                tmp_context, err, CL_DEVICE_NOT_FOUND,
+                "clCreateContextFromType should return CL_DEVICE_NOT_AVAILABLE "
+                "when: \"no devices that match device_type and property values "
+                "specified in properties are currently available\"",
+                TEST_FAIL);
+            break;
+        }
+    }
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clRetainContext */
+REGISTER_TEST(negative_retain_context)
+{
+    cl_int err = clRetainContext(nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_CONTEXT,
+        "clRetainContext should return CL_INVALID_CONTEXT when: \"context is "
+        "not a valid OpenCL context\" using a nullptr",
+        TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clReleaseContext */
+REGISTER_TEST(negative_release_context)
+{
+    cl_int err = clReleaseContext(nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_CONTEXT,
+        "clReleaseContext should return CL_INVALID_CONTEXT when: \"context is "
+        "not a valid OpenCL context\" using a nullptr",
+        TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clGetContextInfo */
+REGISTER_TEST(negative_get_context_info)
+{
+
+    cl_uint param_value = 0;
+    cl_int err = clGetContextInfo(nullptr, CL_CONTEXT_REFERENCE_COUNT,
+                                  sizeof(param_value), &param_value, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_CONTEXT,
+        "clGetContextInfo should return CL_INVALID_CONTEXT when: \"context is "
+        "not a valid context\" using a nullptr",
+        TEST_FAIL);
+
+    cl_context_info INVALID_PARAM_VALUE = 0;
+    err = clGetContextInfo(context, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clGetContextInfo should return CL_INVALID_VALUE when: \"param_name is "
+        "not one of the supported values\"",
+        TEST_FAIL);
+
+    err = clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT,
+                           sizeof(param_value) - 1, &param_value, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clGetContextInfo should return CL_INVALID_VALUE when: \"size in bytes "
+        "specified by param_value_size is < size of return type and "
+        "param_value is not a NULL value\"",
+        TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clSetContextDestructorCallback */
+static void CL_CALLBACK callback(cl_context context, void* user_data) {}
+
+REGISTER_TEST_VERSION(negative_set_context_destructor_callback, Version(3, 0))
+{
+    cl_int err = clSetContextDestructorCallback(nullptr, callback, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_CONTEXT,
+        "clSetContextDestructorCallback should return CL_INVALID_CONTEXT when: "
+        "\"context is not a valid context\" using a nullptr",
+        TEST_FAIL);
+
+    err = clSetContextDestructorCallback(context, nullptr, nullptr);
+    test_failure_error_ret(err, CL_INVALID_VALUE,
+                           "clSetContextDestructorCallback should return "
+                           "CL_INVALID_VALUE when: \"pfn_notify is NULL\"",
+                           TEST_FAIL);
+
+    return TEST_PASS;
+}

From 02a3c7e609d3316bf9b9ef543c14a2ec6ee96c8f Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 13 Jan 2026 18:44:39 +0100
Subject: [PATCH 32/54] Replaced test values for hexfloat and printf to avoid
 ambiguity (#2425)

Fixes #1335 according to the issue description.

@alycm replaced with straightforward values in terms of hex float
representation
---
 test_conformance/printf/test_printf.h   | 20 +++++++-----
 test_conformance/printf/util_printf.cpp | 41 ++++++++++++++++++++-----
 2 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/test_conformance/printf/test_printf.h b/test_conformance/printf/test_printf.h
index 993a6126..519b9dd9 100644
--- a/test_conformance/printf/test_printf.h
+++ b/test_conformance/printf/test_printf.h
@@ -70,14 +70,15 @@ struct printDataGenParameters
 {
     std::vector<std::string> genericFormats;
     const char* dataRepresentation;
-    const char* vectorFormatFlag;
-    const char* vectorFormatSpecifier;
-    const char* dataType;
-    const char* vectorSize;
-    const char* addrSpaceArgumentTypeQualifier;
-    const char* addrSpaceVariableTypeQualifier;
-    const char* addrSpaceParameter;
-    const char* addrSpacePAdd;
+    const char* vectorFormatFlag = nullptr;
+    const char* vectorFormatSpecifier = nullptr;
+    const char* dataType = nullptr;
+    const char* vectorSize = nullptr;
+    const char* addrSpaceArgumentTypeQualifier = nullptr;
+    const char* addrSpaceVariableTypeQualifier = nullptr;
+    const char* addrSpaceParameter = nullptr;
+    const char* addrSpacePAdd = nullptr;
+    bool allowFallbackTest = false;
 };
 
 // Reference results - filled out at run-time
@@ -111,6 +112,9 @@ struct testCase
                     char*,
                     const size_t);                       //function pointer for generating reference results
     Type dataType;                                       //the data type that will be printed during reference result generation (used for setting rounding mode)
+    bool (*fallbackTestFN)(const char*,
+                           const char*) =
+        nullptr; // function pointer to perform fallback test if required
 };
 
 extern const char* strType[];
diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp
index 0768adb5..39b877ff 100644
--- a/test_conformance/printf/util_printf.cpp
+++ b/test_conformance/printf/util_printf.cpp
@@ -26,8 +26,11 @@ static void intRefBuilder(printDataGenParameters&, char*, const size_t);
 static void halfRefBuilder(printDataGenParameters&, char* rResult,
                            const size_t);
 static void floatRefBuilder(printDataGenParameters&, char* rResult, const size_t);
+static bool floatRefTest(const char* refResult, const char* analysisBuffer);
 static void doubleRefBuilder(printDataGenParameters&, char* rResult,
                              const size_t);
+static bool doubleRefTest(const char* refResult, const char* analysisBuffer);
+
 static void octalRefBuilder(printDataGenParameters&, char*, const size_t);
 static void unsignedRefBuilder(printDataGenParameters&, char*, const size_t);
 static void hexRefBuilder(printDataGenParameters&, char*, const size_t);
@@ -468,12 +471,12 @@ std::vector<printDataGenParameters> printFloatGenParameters = {
 
     // Double argument representing floating-point,in [-]xh.hhhhpAd style
 
-    { { "%.6a" }, "0.1f" },
+    { { "%.6a" }, "0.5f", 0, 0, 0, 0, 0, 0, 0, 0, true },
 
     //(Minimum)Ten-wide,Double argument representing floating-point,in
     // xh.hhhhpAd style,default(right)-justified
 
-    { { "%10.2a" }, "9990.235f" },
+    { { "%10.2a" }, "1.5f", 0, 0, 0, 0, 0, 0, 0, 0, true },
 
     //(Minimum)Ten-wide,two positions after the decimal,with
     // a blank space inserted before the value, default(right)-justified
@@ -502,8 +505,9 @@ testCase testCaseFloat = {
 
     floatRefBuilder,
 
-    kfloat
+    kfloat,
 
+    floatRefTest
 };
 
 //==============================================
@@ -673,12 +677,12 @@ std::vector<printDataGenParameters> printDoubleGenParameters = {
 
     // Double argument representing floating-point,in [-]xh.hhhhpAd style
 
-    { { "%.6a" }, "0.1" },
+    { { "%.6a" }, "0.5", 0, 0, 0, 0, 0, 0, 0, 0, true },
 
     //(Minimum)Ten-wide,Double argument representing floating-point,in
     // xh.hhhhpAd style,default(right)-justified
 
-    { { "%10.2a" }, "9990.235" },
+    { { "%10.2a" }, "1.5", 0, 0, 0, 0, 0, 0, 0, 0, true },
 };
 
 //---------------------------------------------------------
@@ -697,8 +701,9 @@ testCase testCaseDouble = {
 
     doubleRefBuilder,
 
-    kdouble
+    kdouble,
 
+    doubleRefTest
 };
 
 //==============================================
@@ -1757,7 +1762,15 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId
         return !std::regex_match(analysisBuffer, nanRegex);
     }
 
-    return strcmp(analysisBuffer, pTestCase->_correctBuffer[testId].c_str());
+    size_t ret =
+        strcmp(analysisBuffer, pTestCase->_correctBuffer[testId].c_str());
+
+    if (ret != 0 && pTestCase->_genParameters[testId].allowFallbackTest
+        && pTestCase->fallbackTestFN)
+        if (pTestCase->fallbackTestFN(
+                analysisBuffer, pTestCase->_correctBuffer[testId].c_str()))
+            return 0;
+    return ret;
 }
 
 static void intRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
@@ -1781,6 +1794,13 @@ static void floatRefBuilder(printDataGenParameters& params, char* refResult, con
              strtof(params.dataRepresentation, NULL));
 }
 
+static bool floatRefTest(const char* refResult, const char* analysisBuffer)
+{
+    float test = strtof(analysisBuffer, NULL);
+    float expected = strtof(refResult, NULL);
+    return test == expected;
+}
+
 static void doubleRefBuilder(printDataGenParameters& params, char* refResult,
                              const size_t refSize)
 {
@@ -1788,6 +1808,13 @@ static void doubleRefBuilder(printDataGenParameters& params, char* refResult,
              strtod(params.dataRepresentation, NULL));
 }
 
+static bool doubleRefTest(const char* refResult, const char* analysisBuffer)
+{
+    double test = strtod(analysisBuffer, NULL);
+    double expected = strtod(refResult, NULL);
+    return test == expected;
+}
+
 static void octalRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
 {
     const unsigned long int data = strtoul(params.dataRepresentation, NULL, 10);

From b681d4f2c89e015aac0aa107a78e6abc153e966b Mon Sep 17 00:00:00 2001
From: Michael Rizkalla <michael.rizkalla@arm.com>
Date: Tue, 13 Jan 2026 17:46:02 +0000
Subject: [PATCH 33/54] Add `cl_ext_immutable_memory_objects` tests writing to
 and from buffer (#2432)

This change extends the test coverage for
https://github.com/KhronosGroup/OpenCL-Docs/pull/1280

The change tests:
1. Writing to immutable buffers.
2. Writing to buffer/image from immutable buffers.
3. Reading from immutable buffers.

This change adds the following tests:
1. `test_negative_imagearraycopy`
2. `test_negative_imagearraycopy3d`
3. `test_immutable_bufferreadwriterect`
4. `test_immutable_arrayreadwrite`
5. `test_write_from_immutable_buffer_to_buffer`
6. `test_immutable_buffer_map_*`

and extends the following tests:
1. `test_arrayimagecopy3d`
2. `test_arrayimagecopy`
3. `test_imagearraycopy3d`
4. `test_imagearraycopy`
5. `test_buffer_copy`
6. `test_buffer_partial_copy`

Signed-off-by: Michael Rizkalla <michael.rizkalla@arm.com>
---
 .../basic/test_arrayimagecopy.cpp             |  28 +-
 .../basic/test_arrayreadwrite.cpp             |  32 +-
 .../basic/test_bufferreadwriterect.cpp        |  77 +++++
 .../basic/test_imagearraycopy.cpp             | 128 +++++++-
 test_conformance/buffers/main.cpp             |  21 +-
 test_conformance/buffers/testBase.h           |   2 +-
 test_conformance/buffers/test_buffer_copy.cpp | 155 ++++++++--
 test_conformance/buffers/test_buffer_fill.cpp |  12 +
 test_conformance/buffers/test_buffer_map.cpp  | 123 ++++++++
 test_conformance/buffers/test_buffer_read.cpp |  18 ++
 .../buffers/test_buffer_write.cpp             | 284 ++++++++++++++++++
 11 files changed, 834 insertions(+), 46 deletions(-)

diff --git a/test_conformance/basic/test_arrayimagecopy.cpp b/test_conformance/basic/test_arrayimagecopy.cpp
index bb44abff..16b24390 100644
--- a/test_conformance/basic/test_arrayimagecopy.cpp
+++ b/test_conformance/basic/test_arrayimagecopy.cpp
@@ -188,9 +188,17 @@ REGISTER_TEST(arrayimagecopy)
 {
     PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
 
-    return test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
-                                 CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
-                                 test_arrayimagecopy_single_format);
+    int error = test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
+                                      CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
+                                      test_arrayimagecopy_single_format);
+    if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
+    {
+        error |= test_arrayimagecommon(
+            device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+            CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
+            test_arrayimagecopy_single_format);
+    }
+    return error;
 }
 
 
@@ -198,7 +206,15 @@ REGISTER_TEST(arrayimagecopy3d)
 {
     PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)
 
-    return test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
-                                 CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D,
-                                 test_arrayimagecopy_single_format);
+    int error = test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
+                                      CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D,
+                                      test_arrayimagecopy_single_format);
+    if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
+    {
+        error |= test_arrayimagecommon(
+            device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+            CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D,
+            test_arrayimagecopy_single_format);
+    }
+    return error;
 }
diff --git a/test_conformance/basic/test_arrayreadwrite.cpp b/test_conformance/basic/test_arrayreadwrite.cpp
index fe4bb995..4b0555fd 100644
--- a/test_conformance/basic/test_arrayreadwrite.cpp
+++ b/test_conformance/basic/test_arrayreadwrite.cpp
@@ -72,16 +72,36 @@ static int test_arrayreadwrite_impl(cl_device_id device, cl_context context,
         err = clEnqueueWriteBuffer(
             queue, buffer, CL_TRUE, offset * sizeof(cl_uint),
             sizeof(cl_uint) * cb, &reference_vals[offset], 0, nullptr, nullptr);
-        test_error(err, "clEnqueueWriteBuffer failed");
+        if (flags & CL_MEM_IMMUTABLE_EXT)
+        {
+            test_failure_error_ret(err, CL_INVALID_OPERATION,
+                                   "clEnqueueWriteBuffer is expected to fail "
+                                   "with CL_INVALID_OPERATION when the buffer "
+                                   "is created with CL_MEM_IMMUTABLE_EXT",
+                                   TEST_FAIL);
+        }
+        else
+        {
+            test_error(err, "clEnqueueWriteBuffer failed");
+        }
 
         err = clEnqueueReadBuffer(
             queue, buffer, CL_TRUE, offset * sizeof(cl_uint),
             cb * sizeof(cl_uint), &outptr[offset], 0, nullptr, nullptr);
         test_error(err, "clEnqueueReadBuffer failed");
 
+        const cl_uint* expected_buffer_values = nullptr;
+        if (flags & CL_MEM_IMMUTABLE_EXT)
+        {
+            expected_buffer_values = inptr.data();
+        }
+        else
+        {
+            expected_buffer_values = reference_vals.data();
+        }
         for (int j = offset; j < offset + cb; j++)
         {
-            if (reference_vals[j] != outptr[j])
+            if (expected_buffer_values[j] != outptr[j])
             {
                 log_error("ARRAY read, write test failed\n");
                 err = -1;
@@ -105,3 +125,11 @@ REGISTER_TEST(arrayreadwrite)
     return test_arrayreadwrite_impl(device, context, queue, num_elements,
                                     CL_MEM_READ_WRITE);
 }
+
+REGISTER_TEST(immutable_arrayreadwrite)
+{
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    return test_arrayreadwrite_impl(device, context, queue, num_elements,
+                                    CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR);
+}
diff --git a/test_conformance/basic/test_bufferreadwriterect.cpp b/test_conformance/basic/test_bufferreadwriterect.cpp
index 883bff7c..03ba2706 100644
--- a/test_conformance/basic/test_bufferreadwriterect.cpp
+++ b/test_conformance/basic/test_bufferreadwriterect.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "harness/compat.h"
+#include "errorHelpers.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -194,6 +195,43 @@ int copy_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, si
     return 0;
 }
 
+int immutable_copy_region(size_t src, size_t soffset[3], size_t sregion[3],
+                          size_t dst, size_t doffset[3], size_t dregion[3])
+{
+
+    // Copy between cl buffers.
+    size_t src_slice_pitch =
+        (width[src] * height[src] != 1) ? width[src] * height[src] : 0;
+    size_t dst_slice_pitch =
+        (width[dst] * height[dst] != 1) ? width[dst] * height[dst] : 0;
+    size_t src_row_pitch = width[src];
+
+    cl_int err;
+    if (check_overlap_rect(soffset, doffset, sregion, src_row_pitch,
+                           src_slice_pitch))
+    {
+        log_info("Copy overlap reported, skipping copy buffer rect\n");
+        return CL_SUCCESS;
+    }
+    else
+    {
+        err = clEnqueueCopyBufferRect(gQueue, buffer[src], buffer[dst], soffset,
+                                      doffset, sregion, /*dregion,*/
+                                      width[src], src_slice_pitch, width[dst],
+                                      dst_slice_pitch, 0, nullptr, nullptr);
+        if (err != CL_INVALID_OPERATION)
+        {
+            log_error(
+                "clEnqueueCopyBufferRect should return "
+                "CL_INVALID_OPERATION but returned %s between %zu and %zu",
+                IGetErrorString(err), src, dst);
+            return TEST_FAIL;
+        }
+    }
+
+    return TEST_PASS;
+}
+
 // This function compares the destination region in the buffer pointed
 // to by device, to the source region of the specified verify buffer.
 int verify_region(BufferType* device, size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3]) {
@@ -337,6 +375,32 @@ int write_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, s
     return 0;
 }
 
+int immutable_write_region(size_t src, size_t soffset[3], size_t sregion[3],
+                           size_t dst, size_t doffset[3], size_t dregion[3])
+{
+    initialize_image(tmp_buffer, tmp_buffer_size, 1, 1, mt);
+
+    size_t src_slice_pitch =
+        (width[src] * height[src] != 1) ? width[src] * height[src] : 0;
+    size_t dst_slice_pitch =
+        (width[dst] * height[dst] != 1) ? width[dst] * height[dst] : 0;
+
+    cl_int error = clEnqueueWriteBufferRect(
+        gQueue, buffer[dst], CL_TRUE, doffset, soffset, dregion, width[dst],
+        dst_slice_pitch, width[src], src_slice_pitch, tmp_buffer, 0, nullptr,
+        nullptr);
+
+    if (error != CL_INVALID_OPERATION)
+    {
+        log_error("clEnqueueWriteBufferRect should return CL_INVALID_OPERATION "
+                  "but retured %s between %zu and %zu",
+                  IGetErrorString(error), src, dst);
+        return TEST_FAIL;
+    }
+
+    return TEST_PASS;
+}
+
 void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
 {
     free( data );
@@ -591,3 +655,16 @@ REGISTER_TEST(bufferreadwriterect)
         device, context, queue, num_elements,
         CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, test_functions);
 }
+
+REGISTER_TEST(immutable_bufferreadwriterect)
+{
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    TestFunctions test_functions;
+    test_functions.copy = immutable_copy_region;
+    test_functions.read = read_verify_region;
+    test_functions.write = immutable_write_region;
+    return test_bufferreadwriterect_impl(
+        device, context, queue, num_elements,
+        CL_MEM_USE_HOST_PTR | CL_MEM_IMMUTABLE_EXT, test_functions);
+}
diff --git a/test_conformance/basic/test_imagearraycopy.cpp b/test_conformance/basic/test_imagearraycopy.cpp
index a400c460..d0ce67a1 100644
--- a/test_conformance/basic/test_imagearraycopy.cpp
+++ b/test_conformance/basic/test_imagearraycopy.cpp
@@ -27,6 +27,82 @@ using test_function_t = int (*)(cl_device_id, cl_context, cl_command_queue,
                                 cl_mem_flags, cl_mem_flags, cl_mem_object_type,
                                 const cl_image_format *);
 
+static int test_negative_imagearraycopy_single_format(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    cl_mem_flags image_flags, cl_mem_flags buffer_flags,
+    cl_mem_object_type image_type, const cl_image_format *format)
+{
+    std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
+        imgptr{ nullptr, free };
+    clMemWrapper image;
+    clMemWrapper buffer;
+    const size_t img_width = 512;
+    const size_t img_height = 512;
+    const size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
+    size_t elem_size;
+    size_t buffer_size;
+    cl_int err;
+    RandomSeed seed(gRandomSeed);
+
+    const size_t origin[3] = { 0, 0, 0 },
+                 region[3] = { img_width, img_height, img_depth };
+
+    log_info("Testing %s %s\n",
+             GetChannelOrderName(format->image_channel_order),
+             GetChannelTypeName(format->image_channel_data_type));
+
+    elem_size = get_pixel_size(format);
+    buffer_size =
+        sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
+
+    if (image_flags & CL_MEM_USE_HOST_PTR || image_flags & CL_MEM_COPY_HOST_PTR)
+    {
+        imgptr.reset(static_cast<cl_uchar *>(
+            create_random_data(kUChar, seed, buffer_size)));
+    }
+
+    bufptr.reset(
+        static_cast<cl_uchar *>(create_random_data(kUChar, seed, buffer_size)));
+
+    if (CL_MEM_OBJECT_IMAGE2D == image_type)
+    {
+        image = create_image_2d(context, image_flags, format, img_width,
+                                img_height, 0, imgptr.get(), &err);
+    }
+    else
+    {
+        image =
+            create_image_3d(context, image_flags, format, img_width, img_height,
+                            img_depth, 0, 0, imgptr.get(), &err);
+    }
+    test_error(err, "create_image_xd failed");
+
+    if (!(image_flags & CL_MEM_USE_HOST_PTR
+          || image_flags & CL_MEM_COPY_HOST_PTR))
+    {
+        imgptr.reset(static_cast<cl_uchar *>(
+            create_random_data(kUChar, seed, buffer_size)));
+
+        err = clEnqueueWriteImage(queue, image, CL_TRUE, origin, region, 0, 0,
+                                  imgptr.get(), 0, nullptr, nullptr);
+        test_error(err, "clEnqueueWriteImage failed");
+    }
+
+    buffer =
+        clCreateBuffer(context, buffer_flags, buffer_size, bufptr.get(), &err);
+    test_error(err, "clCreateBuffer failed");
+
+    err = clEnqueueCopyImageToBuffer(queue, image, buffer, origin, region, 0, 0,
+                                     nullptr, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_OPERATION,
+        "clEnqueueCopyImageToBuffer should return CL_INVALID_OPERATION when: "
+        "\" dst_buffer is created with CL_MEM_IMMUTABLE_EXT flag\"",
+        TEST_FAIL);
+
+    return TEST_PASS;
+}
+
 static int test_imagearraycopy_single_format(
     cl_device_id device, cl_context context, cl_command_queue queue,
     cl_mem_flags image_flags, cl_mem_flags buffer_flags,
@@ -188,9 +264,18 @@ REGISTER_TEST(imagearraycopy)
 {
     PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
 
-    return test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE,
-                                 CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
-                                 test_imagearraycopy_single_format);
+    int error = test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE,
+                                      CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
+                                      test_imagearraycopy_single_format);
+
+    if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
+    {
+        error |= test_imagearraycommon(
+            device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+            CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
+            test_imagearraycopy_single_format);
+    }
+    return error;
 }
 
 
@@ -198,7 +283,38 @@ REGISTER_TEST(imagearraycopy3d)
 {
     PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)
 
-    return test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY,
-                                 CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D,
-                                 test_imagearraycopy_single_format);
+    int error = test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY,
+                                      CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D,
+                                      test_imagearraycopy_single_format);
+
+    if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
+    {
+        error |= test_imagearraycommon(
+            device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+            CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D,
+            test_imagearraycopy_single_format);
+    }
+    return error;
+}
+
+REGISTER_TEST(negative_imagearraycopy)
+{
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(device);
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    return test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE,
+                                 CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+                                 CL_MEM_OBJECT_IMAGE2D,
+                                 test_negative_imagearraycopy_single_format);
+}
+
+REGISTER_TEST(negative_imagearraycopy3d)
+{
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device);
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    return test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY,
+                                 CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+                                 CL_MEM_OBJECT_IMAGE3D,
+                                 test_negative_imagearraycopy_single_format);
 }
diff --git a/test_conformance/buffers/main.cpp b/test_conformance/buffers/main.cpp
index f2a8c2a3..496d3b7d 100644
--- a/test_conformance/buffers/main.cpp
+++ b/test_conformance/buffers/main.cpp
@@ -19,19 +19,24 @@
 
 #include "testBase.h"
 
-const cl_mem_flags flag_set[] = {
-    CL_MEM_ALLOC_HOST_PTR,
-    CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
-    CL_MEM_USE_HOST_PTR,
-    CL_MEM_COPY_HOST_PTR,
-    0
-};
+const cl_mem_flags flag_set[] = { CL_MEM_ALLOC_HOST_PTR,
+                                  CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+                                  CL_MEM_USE_HOST_PTR,
+                                  CL_MEM_COPY_HOST_PTR,
+                                  0,
+                                  CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+                                  CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR,
+                                  CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR
+                                      | CL_MEM_ALLOC_HOST_PTR };
 const char* flag_set_names[] = {
     "CL_MEM_ALLOC_HOST_PTR",
     "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
     "CL_MEM_USE_HOST_PTR",
     "CL_MEM_COPY_HOST_PTR",
-    "0"
+    "0",
+    "CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR",
+    "CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR",
+    "CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR",
 };
 
 int main( int argc, const char *argv[] )
diff --git a/test_conformance/buffers/testBase.h b/test_conformance/buffers/testBase.h
index 8c5bb0e4..4cd17155 100644
--- a/test_conformance/buffers/testBase.h
+++ b/test_conformance/buffers/testBase.h
@@ -25,6 +25,6 @@
 extern const cl_mem_flags flag_set[];
 extern const char* flag_set_names[];
 
-#define NUM_FLAGS 5
+#define NUM_FLAGS 8
 
 #endif // _testBase_h
diff --git a/test_conformance/buffers/test_buffer_copy.cpp b/test_conformance/buffers/test_buffer_copy.cpp
index 81dbd5cf..cba2c626 100644
--- a/test_conformance/buffers/test_buffer_copy.cpp
+++ b/test_conformance/buffers/test_buffer_copy.cpp
@@ -39,7 +39,8 @@ static int verify_copy_buffer(int *inptr, int *outptr, int n)
 
 using alignedOwningPtr = std::unique_ptr<cl_int[], decltype(&align_free)>;
 
-static int test_copy( cl_command_queue queue, cl_context context, int num_elements, MTdata d )
+static int test_copy(cl_device_id device, cl_command_queue queue,
+                     cl_context context, int num_elements, MTdata d)
 {
     clMemWrapper buffers[2];
     cl_int err = CL_SUCCESS;
@@ -76,10 +77,19 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
         return TEST_FAIL;
     }
 
+    const bool has_immutable_memory_extension =
+        is_extension_available(device, "cl_ext_immutable_memory_objects");
+
     for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
     {
         for (int dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
         {
+            if (((flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+                 || (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
+                && !has_immutable_memory_extension)
+            {
+                continue;
+            }
             log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
 
             for (int i = 0; i < num_elements; i++)
@@ -89,7 +99,6 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
                 reference_ptr[i] = (int)genrand_int32(d);
             }
 
-
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
                 buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],
                                             sizeof(cl_int) * num_elements,
@@ -116,7 +125,9 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
                 return TEST_FAIL;
             }
 
-            if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
+            if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)
+                && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+            {
                 err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
                                            sizeof(cl_int) * num_elements,
                                            reference_ptr.get(), 0, nullptr,
@@ -130,11 +141,44 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
             err = clEnqueueCopyBuffer(queue, buffers[0], buffers[1], 0, 0,
                                       sizeof(cl_int) * num_elements, 0, nullptr,
                                       nullptr);
-            if ( err != CL_SUCCESS ){
+            if ((flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
+            {
+                if (err != CL_INVALID_OPERATION)
+                {
+                    test_failure_error_ret(err, CL_INVALID_OPERATION,
+                                           "clEnqueueCopyBuffer should return "
+                                           "CL_INVALID_OPERATION when: "
+                                           "\"dst_buffer is created with "
+                                           "CL_MEM_IMMUTABLE_EXT flag\"",
+                                           TEST_FAIL);
+                    return TEST_FAIL;
+                }
+            }
+            else if (err != CL_SUCCESS)
+            {
                 print_error(err, "clCopyArray failed\n");
                 return TEST_FAIL;
             }
 
+            err = clEnqueueReadBuffer(queue, buffers[0], true, 0,
+                                      sizeof(int) * num_elements, out_ptr.get(),
+                                      0, nullptr, nullptr);
+            if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(),
+                                   num_elements))
+            {
+                log_error("test failed\n");
+                return TEST_FAIL;
+            }
+            else
+            {
+                log_info("test passed\n");
+            }
+
+            // Reset out_ptr
+            for (int i = 0; i < num_elements; i++)
+            {
+                out_ptr[i] = (int)0xdeadbeef; // seed with incorrect data
+            }
             err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
                                       sizeof(int) * num_elements, out_ptr.get(),
                                       0, nullptr, nullptr);
@@ -143,14 +187,20 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
                 return TEST_FAIL;
             }
 
-            if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(),
-                                   num_elements))
+            int *target_buffer = reference_ptr.get();
+            if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
             {
-                log_error( " test failed\n" );
+                target_buffer = invalid_ptr.get();
+            }
+
+            if (verify_copy_buffer(target_buffer, out_ptr.get(), num_elements))
+            {
+                log_error("test failed\n");
                 return TEST_FAIL;
             }
-            else{
-                log_info( " test passed\n" );
+            else
+            {
+                log_info("test passed\n");
             }
         } // dst flags
     } // src flags
@@ -160,7 +210,10 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
 }   // end test_copy()
 
 
-static int testPartialCopy( cl_command_queue queue, cl_context context, int num_elements, cl_uint srcStart, cl_uint dstStart, int size, MTdata d )
+static int testPartialCopy(cl_device_id device, cl_command_queue queue,
+                           cl_context context, int num_elements,
+                           cl_uint srcStart, cl_uint dstStart, int size,
+                           MTdata d)
 {
     clMemWrapper buffers[2];
     cl_int err = CL_SUCCESS;
@@ -197,10 +250,19 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_
         return TEST_FAIL;
     }
 
+    const bool has_immutable_memory_extension =
+        is_extension_available(device, "cl_ext_immutable_memory_objects");
+
     for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
     {
         for (int dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
         {
+            if (((flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+                 || (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
+                && !has_immutable_memory_extension)
+            {
+                continue;
+            }
             log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
 
             for (int i = 0; i < num_elements; i++)
@@ -236,7 +298,9 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_
                 return TEST_FAIL;
             }
 
-            if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)){
+            if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)
+                && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+            {
                 err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
                                            sizeof(cl_int) * num_elements,
                                            reference_ptr.get(), 0, nullptr,
@@ -251,27 +315,72 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_
                 queue, buffers[0], buffers[1], srcStart * sizeof(cl_int),
                 dstStart * sizeof(cl_int), sizeof(cl_int) * size, 0, nullptr,
                 nullptr);
-            if ( err != CL_SUCCESS){
-                print_error(err, "clEnqueueCopyBuffer failed\n");
+            if ((flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
+            {
+                if (err != CL_INVALID_OPERATION)
+                {
+                    test_failure_error_ret(err, CL_INVALID_OPERATION,
+                                           "clEnqueueCopyBuffer should return "
+                                           "CL_INVALID_OPERATION when: "
+                                           "\"dst_buffer is created with "
+                                           "CL_MEM_IMMUTABLE_EXT flag\"",
+                                           TEST_FAIL);
+                }
+            }
+            else if (err != CL_SUCCESS)
+            {
+                print_error(err, "clCopyArray failed\n");
                 return TEST_FAIL;
             }
 
+            err = clEnqueueReadBuffer(queue, buffers[0], true, 0,
+                                      sizeof(int) * num_elements, out_ptr.get(),
+                                      0, nullptr, nullptr);
+            if (err != CL_SUCCESS)
+            {
+                print_error(err, "clEnqueueReadBuffer failed\n");
+                return TEST_FAIL;
+            }
+            if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(),
+                                   num_elements))
+            {
+                log_error("test failed\n");
+                return TEST_FAIL;
+            }
+            else
+            {
+                log_info("test passed\n");
+            }
+
+            // Reset out_ptr
+            for (int i = 0; i < num_elements; i++)
+            {
+                out_ptr[i] = (int)0xdeadbeef; // seed with incorrect data
+            }
             err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
                                       sizeof(int) * num_elements, out_ptr.get(),
                                       0, nullptr, nullptr);
-            if ( err != CL_SUCCESS){
+            if (err != CL_SUCCESS)
+            {
                 print_error(err, "clEnqueueReadBuffer failed\n");
                 return TEST_FAIL;
             }
 
-            if (verify_copy_buffer(reference_ptr.get() + srcStart,
-                                   out_ptr.get() + dstStart, size))
+            cl_int *target_buffer = reference_ptr.get() + srcStart;
+            if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
             {
-                log_error("buffer_COPY test failed\n");
+                target_buffer = invalid_ptr.get();
+            }
+
+            if (verify_copy_buffer(target_buffer, out_ptr.get() + dstStart,
+                                   size))
+            {
+                log_error("test failed\n");
                 return TEST_FAIL;
             }
-            else{
-                log_info("buffer_COPY test passed\n");
+            else
+            {
+                log_info("test passed\n");
             }
         } // dst mem flags
     } // src mem flags
@@ -289,7 +398,7 @@ REGISTER_TEST(buffer_copy)
 
     // test the preset size
     log_info( "set size: %d: ", num_elements );
-    if (test_copy(queue, context, num_elements, d) != TEST_PASS)
+    if (test_copy(device, queue, context, num_elements, d) != TEST_PASS)
     {
         err++;
     }
@@ -298,7 +407,7 @@ REGISTER_TEST(buffer_copy)
     for ( i = 0; i < 8; i++ ){
         size = (int)get_random_float(2.f,131072.f, d);
         log_info( "random size: %d: ", size );
-        if (test_copy(queue, context, size, d) != TEST_PASS)
+        if (test_copy(device, queue, context, size, d) != TEST_PASS)
         {
             err++;
         }
@@ -324,8 +433,8 @@ REGISTER_TEST(buffer_partial_copy)
         size = (int)get_random_float( 8.f, (float)(num_elements - srcStart), d );
         dstStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - size), d );
         log_info( "random partial copy from %d to %d, size: %d: ", (int)srcStart, (int)dstStart, size );
-        if (testPartialCopy(queue, context, num_elements, srcStart, dstStart,
-                            size, d)
+        if (testPartialCopy(device, queue, context, num_elements, srcStart,
+                            dstStart, size, d)
             != TEST_PASS)
         {
             err++;
diff --git a/test_conformance/buffers/test_buffer_fill.cpp b/test_conformance/buffers/test_buffer_fill.cpp
index 2e7a22de..d8fa7654 100644
--- a/test_conformance/buffers/test_buffer_fill.cpp
+++ b/test_conformance/buffers/test_buffer_fill.cpp
@@ -598,6 +598,12 @@ static int test_buffer_fill(cl_device_id deviceID, cl_context context,
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
+
             clEventWrapper event[2];
             clMemWrapper buffers[2];
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
@@ -721,6 +727,12 @@ REGISTER_TEST(buffer_fill_struct)
 
     for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
     {
+        // Skip immutable memory flags
+        if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+        {
+            continue;
+        }
+
         clProgramWrapper program;
         clKernelWrapper kernel;
         log_info("Testing with cl_mem_flags: %s\n",
diff --git a/test_conformance/buffers/test_buffer_map.cpp b/test_conformance/buffers/test_buffer_map.cpp
index 5cac90ab..3299902c 100644
--- a/test_conformance/buffers/test_buffer_map.cpp
+++ b/test_conformance/buffers/test_buffer_map.cpp
@@ -592,6 +592,12 @@ static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_c
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
+
             clMemWrapper buffer;
             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
@@ -671,6 +677,101 @@ static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_c
 
 }   // end test_buffer_map_read()
 
+int test_immutable_buffer_map(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int num_elements,
+                              size_t size, const char *type, int loops)
+{
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    void *outptr[5];
+    cl_int err;
+    int i;
+    size_t ptrSizes[5];
+    int total_errors = 0;
+    MTdataHolder mtdata(gRandomSeed);
+
+    size_t min_alignment = get_min_alignment(context);
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    // embedded devices don't support long/ulong so skip over
+    if (!gHasLong && strstr(type, "long")) return TEST_SKIPPED_ITSELF;
+
+    for (i = 0; i < loops; i++)
+    {
+        for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
+        {
+            // Testing writing from immutable flags
+            if (!(flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT))
+            {
+                continue;
+            }
+
+            clMemWrapper buffer;
+            outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            if (!outptr[i])
+            {
+                log_error(" unable to allocate %d bytes of memory\n",
+                          (int)ptrSizes[i] * num_elements);
+                return TEST_FAIL;
+            }
+            generate_random_data(kUChar, ptrSizes[i] * num_elements, mtdata,
+                                 outptr[i]);
+
+            buffer =
+                clCreateBuffer(context, flag_set[src_flag_id],
+                               ptrSizes[i] * num_elements, outptr[i], &err);
+
+            if (nullptr == buffer || CL_SUCCESS != err)
+            {
+                print_error(err, "clCreateBuffer failed\n");
+                align_free(outptr[i]);
+                return TEST_FAIL;
+            }
+
+            void *mappedPtr = clEnqueueMapBuffer(
+                queue, buffer, CL_TRUE, CL_MAP_READ, 0,
+                ptrSizes[i] * num_elements, 0, nullptr, nullptr, &err);
+            if (err != CL_SUCCESS)
+            {
+                print_error(err, "clEnqueueMapBuffer failed");
+                align_free(outptr[i]);
+                return TEST_FAIL;
+            }
+
+            if (memcmp(mappedPtr, outptr[i], ptrSizes[i] * num_elements) != 0)
+            {
+                log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
+                          1 << i, flag_set_names[src_flag_id]);
+                total_errors++;
+            }
+            else
+            {
+                log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
+                         1 << i, flag_set_names[src_flag_id]);
+            }
+
+            err = clEnqueueUnmapMemObject(queue, buffer, mappedPtr, 0, nullptr,
+                                          nullptr);
+            test_error(err, "clEnqueueUnmapMemObject failed");
+
+            // If we are using the outptr[i] as backing via USE_HOST_PTR we need
+            // to make sure we are done before freeing.
+            if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR))
+            {
+                err = clFinish(queue);
+                test_error(err, "clFinish failed");
+            }
+            align_free(outptr[i]);
+        }
+    } // cl_mem_flags
+
+    return total_errors > 0 ? TEST_FAIL : TEST_PASS;
+}
 
 #define DECLARE_LOCK_TEST(type, realType)                                      \
     REGISTER_TEST(buffer_map_read_##type)                                      \
@@ -691,6 +792,28 @@ DECLARE_LOCK_TEST(char, cl_char)
 DECLARE_LOCK_TEST(uchar, cl_uchar)
 DECLARE_LOCK_TEST(float, cl_float)
 
+#undef DECLARE_LOCK_TEST
+
+#define DECLARE_LOCK_TEST(type, realType)                                      \
+    REGISTER_TEST(immutable_buffer_map_##type)                                 \
+    {                                                                          \
+        return test_immutable_buffer_map(device, context, queue, num_elements, \
+                                         sizeof(realType), #type, 5);          \
+    }
+
+DECLARE_LOCK_TEST(int, cl_int)
+DECLARE_LOCK_TEST(uint, cl_uint)
+DECLARE_LOCK_TEST(long, cl_long)
+DECLARE_LOCK_TEST(ulong, cl_ulong)
+DECLARE_LOCK_TEST(short, cl_short)
+DECLARE_LOCK_TEST(ushort, cl_ushort)
+DECLARE_LOCK_TEST(char, cl_char)
+DECLARE_LOCK_TEST(uchar, cl_uchar)
+DECLARE_LOCK_TEST(float, cl_float)
+
+#undef DECLARE_LOCK_TEST
+
+
 REGISTER_TEST(buffer_map_read_struct)
 {
     int (*foo)(void *,int);
diff --git a/test_conformance/buffers/test_buffer_read.cpp b/test_conformance/buffers/test_buffer_read.cpp
index dbf39ab4..814dee45 100644
--- a/test_conformance/buffers/test_buffer_read.cpp
+++ b/test_conformance/buffers/test_buffer_read.cpp
@@ -666,6 +666,12 @@ static int test_buffer_read(cl_device_id deviceID, cl_context context,
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
+
             clMemWrapper buffer;
             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
@@ -809,6 +815,12 @@ static int test_buffer_read_async(cl_device_id deviceID, cl_context context,
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
+
             clMemWrapper buffer;
             clEventWrapper event;
             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
@@ -946,6 +958,12 @@ static int test_buffer_read_array_barrier(
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
+
             clMemWrapper buffer;
             clEventWrapper event;
             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
diff --git a/test_conformance/buffers/test_buffer_write.cpp b/test_conformance/buffers/test_buffer_write.cpp
index 36dcc963..7c92dfd9 100644
--- a/test_conformance/buffers/test_buffer_write.cpp
+++ b/test_conformance/buffers/test_buffer_write.cpp
@@ -660,8 +660,18 @@ static int test_buffer_write(cl_device_id deviceID, cl_context context,
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
             for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
             {
+                // Skip immutable memory flags
+                if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
+                {
+                    continue;
+                }
                 clMemWrapper buffers[2];
 
                 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
@@ -834,8 +844,19 @@ REGISTER_TEST(buffer_write_struct)
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
             for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
             {
+                // Skip immutable memory flags
+                if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
+                {
+                    continue;
+                }
+
                 clMemWrapper buffers[2];
 
                 inptr[i] = (TestStruct *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
@@ -996,7 +1017,17 @@ static int test_buffer_write_array_async(
     ptrSizes[4] = ptrSizes[3] << 1;
 
     for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        // Skip immutable memory flags
+        if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+        {
+            continue;
+        }
         for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
+            // Skip immutable memory flags
+            if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
             log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
 
             loops = ( loops < 5 ? loops : 5 );
@@ -1974,3 +2005,256 @@ REGISTER_TEST(buffer_write_async_ulong)
 
 }   // end test_buffer_ulong_write_array_async()
 
+
+int immutable_test_buffer_write(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements,
+                                size_t size, const char *type, int loops,
+                                void *inptr[5], const char *kernelCode[],
+                                const char *kernelName[],
+                                int (*fn)(void *, void *, int), MTdataHolder &d)
+{
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    void *outptr[5];
+    clProgramWrapper program[5];
+    clKernelWrapper kernel[5];
+    size_t ptrSizes[5];
+    size_t global_work_size[3];
+    cl_int err;
+    int i;
+    int src_flag_id, dst_flag_id;
+    int total_errors = 0;
+
+    size_t min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (size_t)num_elements;
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    loops = (loops < 5 ? loops : 5);
+    for (i = 0; i < loops; i++)
+    {
+        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
+                                          &kernelCode[i], kernelName[i]);
+        if (err)
+        {
+            log_error(" Error creating program for %s\n", type);
+            return TEST_FAIL;
+        }
+
+        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
+        {
+            // Testing writing from immutable flags
+            if (!(flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT))
+            {
+                continue;
+            }
+            for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
+            {
+                // Skip immutable memory flags
+                if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
+                {
+                    continue;
+                }
+
+                cl_mem_flags src_mem_flags = flag_set[src_flag_id];
+                cl_mem_flags dst_mem_flags = flag_set[dst_flag_id];
+                clMemWrapper buffers[2];
+
+                buffers[0] =
+                    clCreateBuffer(context, src_mem_flags,
+                                   ptrSizes[i] * num_elements, inptr[i], &err);
+
+                if (nullptr == buffers[0] || CL_SUCCESS != err)
+                {
+                    align_free(outptr[i]);
+                    print_error(err, " clCreateBuffer failed\n");
+                    return TEST_FAIL;
+                }
+                if (!strcmp(type, "half"))
+                {
+                    outptr[i] = align_malloc(ptrSizes[i] * (num_elements * 2),
+                                             min_alignment);
+                    buffers[1] = clCreateBuffer(context, dst_mem_flags,
+                                                ptrSizes[i] * 2 * num_elements,
+                                                outptr[i], &err);
+                }
+                else
+                {
+                    outptr[i] =
+                        align_malloc(ptrSizes[i] * num_elements, min_alignment);
+                    if ((dst_mem_flags & CL_MEM_USE_HOST_PTR)
+                        || (dst_mem_flags & CL_MEM_COPY_HOST_PTR))
+                        buffers[1] = clCreateBuffer(context, dst_mem_flags,
+                                                    ptrSizes[i] * num_elements,
+                                                    outptr[i], &err);
+                    else
+                        buffers[1] = clCreateBuffer(context, dst_mem_flags,
+                                                    ptrSizes[i] * num_elements,
+                                                    nullptr, &err);
+                }
+                if (err)
+                {
+                    align_free(outptr[i]);
+                    print_error(err, " clCreateBuffer failed\n");
+                    return TEST_FAIL;
+                }
+
+                err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem),
+                                     (void *)&buffers[0]);
+                err |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem),
+                                      (void *)&buffers[1]);
+                if (err != CL_SUCCESS)
+                {
+                    align_free(outptr[i]);
+                    print_error(err, " clSetKernelArg failed");
+                    return TEST_FAIL;
+                }
+
+                err = clEnqueueNDRangeKernel(queue, kernel[i], 1, nullptr,
+                                             global_work_size, nullptr, 0,
+                                             nullptr, nullptr);
+                if (err != CL_SUCCESS)
+                {
+                    print_error(err, " clEnqueueNDRangeKernel failed");
+                    align_free(outptr[i]);
+                    return TEST_FAIL;
+                }
+
+                err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
+                                          ptrSizes[i] * num_elements, outptr[i],
+                                          0, nullptr, nullptr);
+
+                if (err != CL_SUCCESS)
+                {
+                    align_free(outptr[i]);
+                    print_error(err, " clEnqueueReadBuffer failed");
+                    return TEST_FAIL;
+                }
+
+                if (fn(inptr[i], outptr[i],
+                       (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0])))
+                {
+                    log_error(
+                        " %s%d test failed. cl_mem_flags src: %s, dst: %s\n",
+                        type, 1 << i, flag_set_names[src_flag_id],
+                        flag_set_names[dst_flag_id]);
+                    total_errors++;
+                }
+                else
+                {
+                    log_info(
+                        " %s%d test passed. cl_mem_flags src: %s, dst: %s\n",
+                        type, 1 << i, flag_set_names[src_flag_id],
+                        flag_set_names[dst_flag_id]);
+                }
+                // cleanup
+                align_free(outptr[i]);
+            }
+        } // dst cl_mem_flag
+    } // src cl_mem_flag
+
+    return total_errors;
+
+} // end test_buffer_write()
+
+REGISTER_TEST(write_from_immutable_buffer_to_buffer)
+{
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    static const char *immutable_buffer_write_int_kernel_code[] = {
+        R"(
+      __kernel void test_buffer_write_int(constant int *src, __global int *dst)
+      {
+        int  tid = get_global_id(0);
+
+        dst[tid] = src[tid];
+      })",
+
+        R"(
+      __kernel void test_buffer_write_int2(constant int2 *src, __global int2 *dst)
+      {
+        int  tid = get_global_id(0);
+
+        dst[tid] = src[tid];
+      })",
+
+        R"(
+      __kernel void test_buffer_write_int4(constant int4 *src, __global int4 *dst)
+      {
+        int  tid = get_global_id(0);
+
+        dst[tid] = src[tid];
+      })",
+
+        R"(
+      __kernel void test_buffer_write_int8(constant int8 *src, __global int8 *dst)
+      {
+        int  tid = get_global_id(0);
+
+        dst[tid] = src[tid];
+      })",
+
+        R"(
+      __kernel void test_buffer_write_int16(constant int16 *src, __global int16 *dst)
+      {
+        int  tid = get_global_id(0);
+
+        dst[tid] = src[tid];
+      })"
+    };
+
+    static const char *immutable_int_kernel_name[] = {
+        "test_buffer_write_int", "test_buffer_write_int2",
+        "test_buffer_write_int4", "test_buffer_write_int8",
+        "test_buffer_write_int16"
+    };
+
+    if (gTestMap)
+    {
+        log_error("Immutable buffers cannot be mapped with CL_MEM_WRITE\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    int *inptr[5];
+    size_t ptrSizes[5];
+    int i, err;
+    cl_uint j;
+    int (*foo)(void *, void *, int);
+    MTdataHolder d(gRandomSeed);
+
+    size_t min_alignment = get_min_alignment(context);
+
+    foo = verify_write_int;
+
+    ptrSizes[0] = sizeof(cl_int);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for (i = 0; i < 5; i++)
+    {
+        inptr[i] =
+            (int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for (j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++)
+            inptr[i][j] = (int)genrand_int32(d);
+    }
+
+    err = immutable_test_buffer_write(device, context, queue, num_elements,
+                                      sizeof(cl_int), "int", 5, (void **)inptr,
+                                      immutable_buffer_write_int_kernel_code,
+                                      immutable_int_kernel_name, foo, d);
+
+    for (i = 0; i < 5; i++)
+    {
+        align_free((void *)inptr[i]);
+    }
+
+    return err;
+}

From e076b4a9e9ecc52c54fb84fe80853db63fbafcb4 Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Tue, 13 Jan 2026 18:51:17 +0100
Subject: [PATCH 34/54] Fix test_atomics - wrong HostDataType taken for
 comparing (#2601)

We see compiler errors on Intel GPU without this fix.
---
 test_conformance/c11_atomics/test_atomics.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index b1326e28..18c86a3b 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -1371,7 +1371,7 @@ public:
     int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                           cl_command_queue queue) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
         {
             if (LocalMemory()
                 && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)

From 5af0e74ef49714679850a2b4f31e458439d7bf67 Mon Sep 17 00:00:00 2001
From: Krzysztof Gibala <krzysztof.gibala@intel.com>
Date: Tue, 20 Jan 2026 18:59:56 +0100
Subject: [PATCH 35/54] Add cl_khr_icd_unloadable to the list of
 known_extensions (#2605)

Signed-off-by: Krzysztof Gibala <krzysztof.gibala@intel.com>

Signed-off-by: Krzysztof Gibala <krzysztof.gibala@intel.com>
---
 .../compiler/test_compiler_defines_for_extensions.cpp            | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index b9cfb608..04abde9a 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -56,6 +56,7 @@ const char *known_extensions[] = {
     "cl_khr_integer_dot_product",
     "cl_khr_subgroup_rotate",
     "cl_khr_kernel_clock",
+    "cl_khr_icd_unloadable",
     // API-only extensions after this point.  If you add above here, modify
     // first_API_extension below.
     "cl_khr_icd",

From 584f27afd479e41d233d57e273f430a8ef35dc17 Mon Sep 17 00:00:00 2001
From: Jose Lopez <joselope@qti.qualcomm.com>
Date: Tue, 20 Jan 2026 18:02:26 +0000
Subject: [PATCH 36/54] Clean up cl_khr_external_semaphore_dx_fence test
 (#2580)

Removes the duplicated code from the tests. Improves the Base test
class.
---
 test_common/harness/extensionHelpers.h        |  10 +
 .../directx_wrapper/directx_wrapper.hpp       |   8 +-
 .../main.cpp                                  |   5 +-
 .../semaphore_dx_fence_base.h                 | 196 ++++---
 .../test_external_semaphore_dx_fence.cpp      | 526 +++++++++---------
 ...est_external_semaphore_dx_fence_export.cpp | 382 ++++++-------
 ...emaphore_dx_fence_negative_wait_signal.cpp |  98 ++--
 ...st_external_semaphore_dx_fence_queries.cpp |  81 ++-
 .../cl_khr_semaphore/semaphore_base.h         |  29 +-
 9 files changed, 662 insertions(+), 673 deletions(-)

diff --git a/test_common/harness/extensionHelpers.h b/test_common/harness/extensionHelpers.h
index e98f67c2..abaa0ba6 100644
--- a/test_common/harness/extensionHelpers.h
+++ b/test_common/harness/extensionHelpers.h
@@ -32,5 +32,15 @@
         }                                                                      \
     } while (false)
 
+#define GET_FUNCTION_EXTENSION_ADDRESS(device, FUNC)                           \
+    FUNC =                                                                     \
+        reinterpret_cast<FUNC##_fn>(clGetExtensionFunctionAddressForPlatform(  \
+            getPlatformFromDevice(device), #FUNC));                            \
+    if (FUNC == nullptr)                                                       \
+    {                                                                          \
+        log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed"     \
+                  " with " #FUNC "\n");                                        \
+        return TEST_FAIL;                                                      \
+    }
 
 #endif // _extensionHelpers_h
diff --git a/test_conformance/common/directx_wrapper/directx_wrapper.hpp b/test_conformance/common/directx_wrapper/directx_wrapper.hpp
index dec85b78..fb758880 100644
--- a/test_conformance/common/directx_wrapper/directx_wrapper.hpp
+++ b/test_conformance/common/directx_wrapper/directx_wrapper.hpp
@@ -26,9 +26,9 @@ class DirectXWrapper {
 public:
     DirectXWrapper();
 
-    ID3D12Device* getDXDevice() const;
-    ID3D12CommandQueue* getDXCommandQueue() const;
-    ID3D12CommandAllocator* getDXCommandAllocator() const;
+    [[nodiscard]] ID3D12Device* getDXDevice() const;
+    [[nodiscard]] ID3D12CommandQueue* getDXCommandQueue() const;
+    [[nodiscard]] ID3D12CommandAllocator* getDXCommandAllocator() const;
 
 protected:
     ComPtr<ID3D12Device> dx_device = nullptr;
@@ -39,7 +39,7 @@ protected:
 class DirectXFenceWrapper {
 public:
     DirectXFenceWrapper(ID3D12Device* dx_device);
-    ID3D12Fence* operator*() const { return dx_fence.Get(); }
+    [[nodiscard]] ID3D12Fence* get() const { return dx_fence.Get(); }
 
 private:
     ComPtr<ID3D12Fence> dx_fence = nullptr;
diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/main.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/main.cpp
index 85c8fc7f..8a0de351 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/main.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/main.cpp
@@ -17,6 +17,7 @@
 
 int main(int argc, const char *argv[])
 {
-    return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
-                          test_registry::getInstance().definitions(), false, 0);
+    return runTestHarness(
+        argc, argv, static_cast<int>(test_registry::getInstance().num_tests()),
+        test_registry::getInstance().definitions(), false, 0);
 }
\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/semaphore_dx_fence_base.h b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/semaphore_dx_fence_base.h
index f8ccb570..31488d9d 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/semaphore_dx_fence_base.h
+++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/semaphore_dx_fence_base.h
@@ -21,23 +21,118 @@
 #include "harness/errorHelpers.h"
 #include "directx_wrapper.hpp"
 
-class CLDXSemaphoreWrapper {
-public:
-    CLDXSemaphoreWrapper(cl_device_id device, cl_context context,
-                         ID3D12Device* dx_device)
-        : device(device), context(context), dx_device(dx_device){};
-
-    int createSemaphoreFromFence(ID3D12Fence* fence)
+struct DXFenceTestBase
+{
+    DXFenceTestBase(cl_device_id device, cl_context context,
+                    cl_command_queue queue, cl_int num_elems)
+        : device(device), context(context), queue(queue), num_elems(num_elems)
+    {}
+    virtual ~DXFenceTestBase()
     {
-        cl_int errcode = CL_SUCCESS;
+        if (fence_handle)
+        {
+            CloseHandle(fence_handle);
+            fence_handle = nullptr;
+        }
+        if (fence_wrapper)
+        {
+            delete fence_wrapper;
+            fence_wrapper = nullptr;
+        }
+        if (semaphore)
+        {
+            clReleaseSemaphoreKHR(semaphore);
+            semaphore = nullptr;
+        }
+    };
 
-        GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
+    virtual int SetUp()
+    {
+        REQUIRE_EXTENSION("cl_khr_external_semaphore");
+        REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
 
-        const HRESULT hr = dx_device->CreateSharedHandle(
-            fence, nullptr, GENERIC_ALL, nullptr, &fence_handle);
-        test_error(FAILED(hr), "Failed to get shared handle from D3D12 fence");
+        // Obtain pointers to semaphore's API
+        GET_FUNCTION_EXTENSION_ADDRESS(device,
+                                       clCreateSemaphoreWithPropertiesKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clReleaseSemaphoreKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueSignalSemaphoresKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueWaitSemaphoresKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreHandleForTypeKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clRetainSemaphoreKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreInfoKHR);
 
-        cl_semaphore_properties_khr sem_props[] = {
+        test_error(
+            !is_import_handle_available(CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
+            "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
+            "supported import types");
+
+        // Import D3D12 fence into OpenCL
+        fence_wrapper = new DirectXFenceWrapper(dx_wrapper.getDXDevice());
+        semaphore = createSemaphoreFromFence(fence_wrapper->get());
+        test_assert_error(!!semaphore, "Could not create semaphore");
+
+        return TEST_PASS;
+    }
+
+    virtual cl_int Run() = 0;
+
+protected:
+    int errcode = CL_SUCCESS;
+
+    cl_device_id device = nullptr;
+    cl_context context = nullptr;
+    cl_command_queue queue = nullptr;
+    cl_int num_elems = 0;
+    DirectXWrapper dx_wrapper;
+
+    cl_semaphore_payload_khr semaphore_payload = 1;
+    cl_semaphore_khr semaphore = nullptr;
+    HANDLE fence_handle = nullptr;
+    DirectXFenceWrapper *fence_wrapper = nullptr;
+
+    clCreateSemaphoreWithPropertiesKHR_fn clCreateSemaphoreWithPropertiesKHR =
+        nullptr;
+    clEnqueueSignalSemaphoresKHR_fn clEnqueueSignalSemaphoresKHR = nullptr;
+    clEnqueueWaitSemaphoresKHR_fn clEnqueueWaitSemaphoresKHR = nullptr;
+    clReleaseSemaphoreKHR_fn clReleaseSemaphoreKHR = nullptr;
+    clGetSemaphoreInfoKHR_fn clGetSemaphoreInfoKHR = nullptr;
+    clRetainSemaphoreKHR_fn clRetainSemaphoreKHR = nullptr;
+    clGetSemaphoreHandleForTypeKHR_fn clGetSemaphoreHandleForTypeKHR = nullptr;
+
+    [[nodiscard]] bool is_import_handle_available(
+        const cl_external_memory_handle_type_khr handle_type)
+    {
+        size_t import_types_size = 0;
+        errcode =
+            clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
+                            0, nullptr, &import_types_size);
+        if (errcode != CL_SUCCESS)
+        {
+            log_error("Could not query import semaphore handle types");
+            return false;
+        }
+        std::vector<cl_external_semaphore_handle_type_khr> import_types(
+            import_types_size / sizeof(cl_external_semaphore_handle_type_khr));
+        errcode =
+            clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
+                            import_types_size, import_types.data(), nullptr);
+        if (errcode != CL_SUCCESS)
+        {
+            log_error("Could not query import semaphore handle types");
+            return false;
+        }
+
+        return std::find(import_types.begin(), import_types.end(), handle_type)
+            != import_types.end();
+    }
+
+    cl_semaphore_khr createSemaphoreFromFence(ID3D12Fence *src_fence)
+    {
+        const HRESULT hr = dx_wrapper.getDXDevice()->CreateSharedHandle(
+            src_fence, nullptr, GENERIC_ALL, nullptr, &fence_handle);
+        if (FAILED(hr)) return nullptr;
+
+        const cl_semaphore_properties_khr sem_props[] = {
             static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
             static_cast<cl_semaphore_properties_khr>(
                 CL_SEMAPHORE_TYPE_BINARY_KHR),
@@ -45,73 +140,30 @@ public:
                 CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
             reinterpret_cast<cl_semaphore_properties_khr>(fence_handle), 0
         };
-        semaphore =
+        cl_semaphore_khr tmp_semaphore =
             clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
-        test_error(errcode, "Could not create semaphore");
+        if (errcode != CL_SUCCESS) return nullptr;
 
-        return CL_SUCCESS;
-    }
-
-    ~CLDXSemaphoreWrapper()
-    {
-        releaseSemaphore();
-        if (fence_handle)
-        {
-            CloseHandle(fence_handle);
-        }
-    };
-
-    const cl_semaphore_khr* operator&() const { return &semaphore; };
-    cl_semaphore_khr operator*() const { return semaphore; };
-
-    HANDLE getHandle() const { return fence_handle; };
-
-private:
-    cl_semaphore_khr semaphore;
-    ComPtr<ID3D12Fence> fence;
-    HANDLE fence_handle;
-    cl_device_id device;
-    cl_context context;
-    ComPtr<ID3D12Device> dx_device;
-
-    int releaseSemaphore() const
-    {
-        GET_PFN(device, clReleaseSemaphoreKHR);
-
-        if (semaphore)
-        {
-            clReleaseSemaphoreKHR(semaphore);
-        }
-
-        return CL_SUCCESS;
+        return tmp_semaphore;
     }
 };
 
-static bool
-is_import_handle_available(cl_device_id device,
-                           const cl_external_memory_handle_type_khr handle_type)
+template <class T>
+int MakeAndRunTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue, cl_int nelems)
 {
-    int errcode = CL_SUCCESS;
-    size_t import_types_size = 0;
-    errcode =
-        clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, 0,
-                        nullptr, &import_types_size);
-    if (errcode != CL_SUCCESS)
+    cl_int status = TEST_PASS;
+    try
     {
-        log_error("Could not query import semaphore handle types");
-        return false;
-    }
-    std::vector<cl_external_semaphore_handle_type_khr> import_types(
-        import_types_size / sizeof(cl_external_semaphore_handle_type_khr));
-    errcode =
-        clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
-                        import_types_size, import_types.data(), nullptr);
-    if (errcode != CL_SUCCESS)
+        auto test_fixture = T(device, context, queue, nelems);
+        status = test_fixture.SetUp();
+        if (status != TEST_PASS) return status;
+        status = test_fixture.Run();
+    } catch (const std::runtime_error &e)
     {
-        log_error("Could not query import semaphore handle types");
-        return false;
+        log_error("%s", e.what());
+        return TEST_FAIL;
     }
 
-    return std::find(import_types.begin(), import_types.end(), handle_type)
-        != import_types.end();
+    return status;
 }
\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence.cpp
index 569fb204..db303bf6 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence.cpp
@@ -16,309 +16,279 @@
 
 #include "semaphore_dx_fence_base.h"
 
+struct SignalWait final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    cl_int Run() override
+    {
+        log_info("Calling clEnqueueSignalSemaphoresKHR\n");
+        clEventWrapper signal_event;
+        errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore,
+                                               &semaphore_payload, 0, nullptr,
+                                               &signal_event);
+        test_error(errcode, "Failed to signal semaphore");
+
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        clEventWrapper wait_event;
+        errcode = clEnqueueWaitSemaphoresKHR(
+            queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
+        test_error(errcode, "Failed to wait semaphore");
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        // Verify that the events completed.
+        test_assert_event_complete(signal_event);
+        test_assert_event_complete(wait_event);
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a signal followed by a wait in OpenCL will complete successfully
 REGISTER_TEST(test_external_semaphores_signal_wait)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    log_info("Calling clEnqueueSignalSemaphoresKHR\n");
-    constexpr cl_semaphore_payload_khr semaphore_payload = 1;
-    clEventWrapper signal_event;
-    errcode = clEnqueueSignalSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
-    test_error(errcode, "Failed to signal semaphore");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    clEventWrapper wait_event;
-    errcode = clEnqueueWaitSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
-    test_error(errcode, "Failed to wait semaphore");
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    // Verify that the events completed.
-    test_assert_event_complete(signal_event);
-    test_assert_event_complete(wait_event);
-
-    return TEST_PASS;
+    return MakeAndRunTest<SignalWait>(device, context, queue, num_elements);
 }
 
+struct SignalDXCPU final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    cl_int Run() override
+    {
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        clEventWrapper wait_event;
+        errcode = clEnqueueWaitSemaphoresKHR(
+            queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
+        test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
+
+        log_info("Calling d3d12_fence->Signal()\n");
+        const HRESULT hr = fence_wrapper->get()->Signal(semaphore_payload);
+        test_error(FAILED(hr), "Failed to signal D3D12 fence");
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(wait_event);
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a wait in OpenCL followed by a CPU signal in DX12 will complete
 // successfully
 REGISTER_TEST(test_external_semaphores_signal_dx_cpu)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    constexpr cl_semaphore_payload_khr semaphore_payload = 1;
-    clEventWrapper wait_event;
-    errcode = clEnqueueWaitSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
-    test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
-
-    log_info("Calling d3d12_fence->Signal()\n");
-    const HRESULT hr = (*fence)->Signal(semaphore_payload);
-    test_error(FAILED(hr), "Failed to signal D3D12 fence");
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(wait_event);
-
-    return TEST_PASS;
+    return MakeAndRunTest<SignalDXCPU>(device, context, queue, num_elements);
 }
 
+struct SignalDXGPU final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    cl_int Run() override
+    {
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        clEventWrapper wait_event;
+        errcode = clEnqueueWaitSemaphoresKHR(
+            queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
+        test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
+
+        log_info("Calling d3d12_command_queue->Signal()\n");
+        const HRESULT hr = dx_wrapper.getDXCommandQueue()->Signal(
+            fence_wrapper->get(), semaphore_payload);
+        test_error(FAILED(hr), "Failed to signal D3D12 fence");
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(wait_event);
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a wait in OpenCL followed by a GPU signal in DX12 will complete
 // successfully
 REGISTER_TEST(test_external_semaphores_signal_dx_gpu)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    constexpr cl_semaphore_payload_khr semaphore_payload = 1;
-    clEventWrapper wait_event;
-    errcode = clEnqueueWaitSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
-    test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
-
-    log_info("Calling d3d12_command_queue->Signal()\n");
-    const HRESULT hr =
-        dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload);
-    test_error(FAILED(hr), "Failed to signal D3D12 fence");
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(wait_event);
-
-    return TEST_PASS;
+    return MakeAndRunTest<SignalDXGPU>(device, context, queue, num_elements);
 }
 
+struct CLDXInterlock final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    cl_int Run() override
+    {
+        log_info("Calling d3d12_command_queue->Wait(1)\n");
+        HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper->get(),
+                                                          semaphore_payload);
+        test_error(FAILED(hr), "Failed to wait on D3D12 fence");
+
+        log_info("Calling d3d12_command_queue->Signal(2)\n");
+        hr = dx_wrapper.getDXCommandQueue()->Signal(fence_wrapper->get(),
+                                                    semaphore_payload + 1);
+        test_error(FAILED(hr), "Failed to signal D3D12 fence");
+
+        log_info("Calling clEnqueueSignalSemaphoresKHR(1)\n");
+        clEventWrapper signal_event;
+        errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore,
+                                               &semaphore_payload, 0, nullptr,
+                                               &signal_event);
+        test_error(errcode, "Failed to call clEnqueueSignalSemaphoresKHR");
+
+        log_info("Calling clEnqueueWaitSemaphoresKHR(2)\n");
+        semaphore_payload += 1;
+        clEventWrapper wait_event;
+        errcode = clEnqueueWaitSemaphoresKHR(
+            queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
+        test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(wait_event);
+        test_assert_event_complete(signal_event);
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that interlocking waits between OpenCL and DX12 will complete
 // successfully
 REGISTER_TEST(test_external_semaphores_cl_dx_interlock)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    log_info("Calling d3d12_command_queue->Wait(1)\n");
-    cl_semaphore_payload_khr semaphore_payload = 1;
-    HRESULT hr =
-        dx_wrapper.getDXCommandQueue()->Wait(*fence, semaphore_payload);
-    test_error(FAILED(hr), "Failed to wait on D3D12 fence");
-
-    log_info("Calling d3d12_command_queue->Signal(2)\n");
-    hr = dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload + 1);
-    test_error(FAILED(hr), "Failed to signal D3D12 fence");
-
-    log_info("Calling clEnqueueSignalSemaphoresKHR(1)\n");
-    clEventWrapper signal_event;
-    errcode = clEnqueueSignalSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
-    test_error(errcode, "Failed to call clEnqueueSignalSemaphoresKHR");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR(2)\n");
-    semaphore_payload += 1;
-    clEventWrapper wait_event;
-    errcode = clEnqueueWaitSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
-    test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(wait_event);
-    test_assert_event_complete(signal_event);
-
-    return TEST_PASS;
+    return MakeAndRunTest<CLDXInterlock>(device, context, queue, num_elements);
 }
 
+struct MultipleWaitSignal final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    ~MultipleWaitSignal() override
+    {
+        if (fence_handle_2)
+        {
+            CloseHandle(fence_handle_2);
+            fence_handle_2 = nullptr;
+        }
+        if (fence_wrapper_2)
+        {
+            delete fence_wrapper_2;
+            fence_wrapper_2 = nullptr;
+        }
+        if (semaphore_2)
+        {
+            clReleaseSemaphoreKHR(semaphore_2);
+            semaphore_2 = nullptr;
+        }
+        DXFenceTestBase::~DXFenceTestBase();
+    };
+
+    int SetUp() override
+    {
+        DXFenceTestBase::SetUp();
+        fence_wrapper_2 = new DirectXFenceWrapper(dx_wrapper.getDXDevice());
+        semaphore_2 = createSemaphoreFromFence(fence_wrapper_2->get());
+        test_assert_error(!!semaphore_2, "Could not create semaphore");
+
+        return TEST_PASS;
+    }
+
+    cl_int Run() override
+    {
+        const cl_semaphore_khr semaphore_list[] = { semaphore, semaphore_2 };
+        cl_semaphore_payload_khr semaphore_payload_list[] = {
+            semaphore_payload, semaphore_payload + 1
+        };
+
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        clEventWrapper wait_event;
+        errcode = clEnqueueWaitSemaphoresKHR(queue, 2, semaphore_list,
+                                             semaphore_payload_list, 0, nullptr,
+                                             &wait_event);
+        test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
+
+        log_info("Calling d3d12_command_queue->Signal()\n");
+        HRESULT hr = dx_wrapper.getDXCommandQueue()->Signal(
+            fence_wrapper_2->get(), semaphore_payload + 1);
+        test_error(FAILED(hr), "Failed to signal D3D12 fence 2");
+        hr = dx_wrapper.getDXCommandQueue()->Signal(fence_wrapper->get(),
+                                                    semaphore_payload);
+        test_error(FAILED(hr), "Failed to signal D3D12 fence 1");
+
+        log_info(
+            "Calling d3d12_command_queue->Wait() with different payloads\n");
+        hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper->get(),
+                                                  semaphore_payload + 3);
+        test_error(FAILED(hr), "Failed to wait on D3D12 fence 1");
+        hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper_2->get(),
+                                                  semaphore_payload + 2);
+        test_error(FAILED(hr), "Failed to wait on D3D12 fence 2");
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(wait_event);
+
+        semaphore_payload_list[0] = semaphore_payload + 3;
+        semaphore_payload_list[1] = semaphore_payload + 2;
+
+        log_info("Calling clEnqueueSignalSemaphoresKHR\n");
+        clEventWrapper signal_event;
+        errcode = clEnqueueSignalSemaphoresKHR(queue, 2, semaphore_list,
+                                               semaphore_payload_list, 0,
+                                               nullptr, &signal_event);
+        test_error(errcode, "Could not call clEnqueueSignalSemaphoresKHR");
+
+        // Wait until the GPU has completed commands up to this fence point.
+        log_info("Waiting for D3D12 command queue completion\n");
+        if (fence_wrapper->get()->GetCompletedValue()
+            < semaphore_payload_list[0])
+        {
+            const HANDLE event_handle =
+                CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS);
+            hr = fence_wrapper->get()->SetEventOnCompletion(
+                semaphore_payload_list[0], event_handle);
+            test_error(FAILED(hr),
+                       "Failed to set D3D12 fence 1 event on completion");
+            WaitForSingleObject(event_handle, INFINITE);
+            CloseHandle(event_handle);
+        }
+        if (fence_wrapper_2->get()->GetCompletedValue()
+            < semaphore_payload_list[1])
+        {
+            const HANDLE event_handle =
+                CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS);
+            hr = fence_wrapper_2->get()->SetEventOnCompletion(
+                semaphore_payload_list[1], event_handle);
+            test_error(FAILED(hr),
+                       "Failed to set D3D12 fence 2 event on completion");
+            WaitForSingleObject(event_handle, INFINITE);
+            CloseHandle(event_handle);
+        }
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(signal_event);
+
+        return TEST_PASS;
+    }
+
+protected:
+    cl_semaphore_khr semaphore_2 = nullptr;
+    HANDLE fence_handle_2 = nullptr;
+    DirectXFenceWrapper *fence_wrapper_2 = nullptr;
+};
+
 // Confirm that multiple waits in OpenCL followed by signals in DX12 and waits
 // in DX12 followed by signals in OpenCL complete successfully
 REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence_1(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore_1(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore_1.createSemaphoreFromFence(*fence_1),
-               "Could not create semaphore");
-
-    const DirectXFenceWrapper fence_2(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore_2(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore_2.createSemaphoreFromFence(*fence_2),
-               "Could not create semaphore");
-
-    const cl_semaphore_khr semaphore_list[] = { *semaphore_1, *semaphore_2 };
-    constexpr cl_semaphore_payload_khr semaphore_payload = 1;
-    cl_semaphore_payload_khr semaphore_payload_list[] = {
-        semaphore_payload, semaphore_payload + 1
-    };
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    clEventWrapper wait_event;
-    errcode = clEnqueueWaitSemaphoresKHR(queue, 2, semaphore_list,
-                                         semaphore_payload_list, 0, nullptr,
-                                         &wait_event);
-    test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
-
-    log_info("Calling d3d12_command_queue->Signal()\n");
-    HRESULT hr =
-        dx_wrapper.getDXCommandQueue()->Signal(*fence_2, semaphore_payload + 1);
-    test_error(FAILED(hr), "Failed to signal D3D12 fence 2");
-    hr = dx_wrapper.getDXCommandQueue()->Signal(*fence_1, semaphore_payload);
-    test_error(FAILED(hr), "Failed to signal D3D12 fence 1");
-
-    log_info("Calling d3d12_command_queue->Wait() with different payloads\n");
-    hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_1, semaphore_payload + 3);
-    test_error(FAILED(hr), "Failed to wait on D3D12 fence 1");
-    hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_2, semaphore_payload + 2);
-    test_error(FAILED(hr), "Failed to wait on D3D12 fence 2");
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(wait_event);
-
-    semaphore_payload_list[0] = semaphore_payload + 3;
-    semaphore_payload_list[1] = semaphore_payload + 2;
-
-    log_info("Calling clEnqueueSignalSemaphoresKHR\n");
-    clEventWrapper signal_event;
-    errcode = clEnqueueSignalSemaphoresKHR(queue, 2, semaphore_list,
-                                           semaphore_payload_list, 0, nullptr,
-                                           &signal_event);
-    test_error(errcode, "Could not call clEnqueueSignalSemaphoresKHR");
-
-    // Wait until the GPU has completed commands up to this fence point.
-    log_info("Waiting for D3D12 command queue completion\n");
-    if ((*fence_1)->GetCompletedValue() < semaphore_payload_list[0])
-    {
-        const HANDLE event_handle =
-            CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
-        hr = (*fence_1)->SetEventOnCompletion(semaphore_payload_list[0],
-                                              event_handle);
-        test_error(FAILED(hr),
-                   "Failed to set D3D12 fence 1 event on completion");
-        WaitForSingleObject(event_handle, INFINITE);
-        CloseHandle(event_handle);
-    }
-    if ((*fence_2)->GetCompletedValue() < semaphore_payload_list[1])
-    {
-        const HANDLE event_handle =
-            CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
-        hr = (*fence_2)->SetEventOnCompletion(semaphore_payload_list[1],
-                                              event_handle);
-        test_error(FAILED(hr),
-                   "Failed to set D3D12 fence 2 event on completion");
-        WaitForSingleObject(event_handle, INFINITE);
-        CloseHandle(event_handle);
-    }
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(signal_event);
-
-    return TEST_PASS;
+    return MakeAndRunTest<MultipleWaitSignal>(device, context, queue,
+                                              num_elements);
 }
\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_export.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_export.cpp
index c54cf61b..ce7db56f 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_export.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_export.cpp
@@ -16,205 +16,205 @@
 
 #include "semaphore_dx_fence_base.h"
 
+struct ExportDXSignal final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    int Run() override
+    {
+        size_t export_types_size = 0;
+        errcode =
+            clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
+                            0, nullptr, &export_types_size);
+        test_error(errcode, "Could not query export semaphore handle types");
+        std::vector<cl_external_semaphore_handle_type_khr> export_types(
+            export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
+        errcode =
+            clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
+                            export_types_size, export_types.data(), nullptr);
+        test_error(errcode, "Could not query export semaphore handle types");
+
+        if (std::find(export_types.begin(), export_types.end(),
+                      CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
+            == export_types.end())
+        {
+            log_info(
+                "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
+                "the supported export types\n");
+            return TEST_FAIL;
+        }
+
+        constexpr cl_semaphore_properties_khr sem_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
+            0
+        };
+        cl_semaphore_khr exportable_semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
+        test_error(errcode, "Could not create semaphore");
+
+        cl_bool is_exportable = CL_FALSE;
+        errcode = clGetSemaphoreInfoKHR(
+            exportable_semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
+            sizeof(is_exportable), &is_exportable, nullptr);
+        test_error(errcode, "Could not get semaphore info");
+        test_error(!is_exportable, "Semaphore is not exportable");
+
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        clEventWrapper wait_event;
+        errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &exportable_semaphore,
+                                             &semaphore_payload, 0, nullptr,
+                                             &wait_event);
+        test_error(errcode, "Failed to wait semaphore");
+
+        HANDLE semaphore_handle = nullptr;
+        errcode = clGetSemaphoreHandleForTypeKHR(
+            exportable_semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
+            sizeof(semaphore_handle), &semaphore_handle, nullptr);
+        test_error(errcode, "Could not get semaphore handle");
+
+        ID3D12Fence *exported_fence = nullptr;
+        errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(
+            semaphore_handle, IID_PPV_ARGS(&exported_fence));
+        test_error(errcode, "Could not open semaphore handle");
+
+        log_info("Calling fence->Signal()\n");
+        const HRESULT hr = exported_fence->Signal(semaphore_payload);
+        test_error(FAILED(hr), "Failed to signal D3D12 fence");
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(wait_event);
+
+        // Release resources
+        CloseHandle(semaphore_handle);
+        test_error(clReleaseSemaphoreKHR(exportable_semaphore),
+                   "Could not release semaphore");
+        exported_fence->Release();
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a wait followed by a signal in DirectX 12 using an exported
 // semaphore will complete successfully
 REGISTER_TEST(test_external_semaphores_export_dx_signal)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-    GET_PFN(device, clGetSemaphoreInfoKHR);
-    GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
-
-    size_t export_types_size = 0;
-    errcode =
-        clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0,
-                        nullptr, &export_types_size);
-    test_error(errcode, "Could not query export semaphore handle types");
-    std::vector<cl_external_semaphore_handle_type_khr> export_types(
-        export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
-    errcode =
-        clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
-                        export_types_size, export_types.data(), nullptr);
-    test_error(errcode, "Could not query export semaphore handle types");
-
-    if (std::find(export_types.begin(), export_types.end(),
-                  CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
-        == export_types.end())
-    {
-        log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
-                 "the supported export types\n");
-        return TEST_FAIL;
-    }
-
-    constexpr cl_semaphore_properties_khr sem_props[] = {
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
-        0
-    };
-    cl_semaphore_khr semaphore =
-        clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
-    test_error(errcode, "Could not create semaphore");
-
-    cl_bool is_exportable = CL_FALSE;
-    errcode =
-        clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
-                              sizeof(is_exportable), &is_exportable, nullptr);
-    test_error(errcode, "Could not get semaphore info");
-    test_error(!is_exportable, "Semaphore is not exportable");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    constexpr cl_semaphore_payload_khr semaphore_payload = 1;
-    clEventWrapper wait_event;
-    errcode = clEnqueueWaitSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
-    test_error(errcode, "Failed to wait semaphore");
-
-    HANDLE semaphore_handle = nullptr;
-    errcode = clGetSemaphoreHandleForTypeKHR(
-        semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
-        sizeof(semaphore_handle), &semaphore_handle, nullptr);
-    test_error(errcode, "Could not get semaphore handle");
-
-    ID3D12Fence *fence = nullptr;
-    errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle,
-                                                         IID_PPV_ARGS(&fence));
-    test_error(errcode, "Could not open semaphore handle");
-
-    log_info("Calling fence->Signal()\n");
-    const HRESULT hr = fence->Signal(semaphore_payload);
-    test_error(FAILED(hr), "Failed to signal D3D12 fence");
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(wait_event);
-
-    // Release resources
-    CloseHandle(semaphore_handle);
-    test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore");
-    fence->Release();
-
-    return TEST_PASS;
+    return MakeAndRunTest<ExportDXSignal>(device, context, queue, num_elements);
 }
 
+struct ExportDXWait final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    int Run() override
+    {
+        size_t export_types_size = 0;
+        errcode =
+            clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
+                            0, nullptr, &export_types_size);
+        test_error(errcode, "Could not query export semaphore handle types");
+        std::vector<cl_external_semaphore_handle_type_khr> export_types(
+            export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
+        errcode =
+            clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
+                            export_types_size, export_types.data(), nullptr);
+        test_error(errcode, "Could not query export semaphore handle types");
+
+        if (std::find(export_types.begin(), export_types.end(),
+                      CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
+            == export_types.end())
+        {
+            log_info(
+                "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
+                "the supported export types\n");
+            return TEST_FAIL;
+        }
+
+        constexpr cl_semaphore_properties_khr sem_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
+            0
+        };
+        cl_semaphore_khr exportable_semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
+        test_error(errcode, "Could not create semaphore");
+
+        cl_bool is_exportable = CL_FALSE;
+        errcode = clGetSemaphoreInfoKHR(
+            exportable_semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
+            sizeof(is_exportable), &is_exportable, nullptr);
+        test_error(errcode, "Could not get semaphore info");
+        test_error(!is_exportable, "Semaphore is not exportable");
+
+        log_info("Calling clEnqueueSignalSemaphoresKHR\n");
+        constexpr cl_semaphore_payload_khr semaphore_payload = 1;
+        clEventWrapper signal_event;
+        errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &exportable_semaphore,
+                                               &semaphore_payload, 0, nullptr,
+                                               &signal_event);
+        test_error(errcode, "Failed to signal semaphore");
+
+        HANDLE semaphore_handle = nullptr;
+        errcode = clGetSemaphoreHandleForTypeKHR(
+            exportable_semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
+            sizeof(semaphore_handle), &semaphore_handle, nullptr);
+        test_error(errcode, "Could not get semaphore handle");
+
+        ID3D12Fence *exported_fence = nullptr;
+        errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(
+            semaphore_handle, IID_PPV_ARGS(&exported_fence));
+        test_error(errcode, "Could not open semaphore handle");
+
+        log_info("Calling dx_wrapper.get_d3d12_command_queue()->Wait()\n");
+        HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(exported_fence,
+                                                          semaphore_payload);
+        test_error(FAILED(hr), "Failed to wait on D3D12 fence");
+
+        log_info("Calling WaitForSingleObject\n");
+        if (exported_fence->GetCompletedValue() < semaphore_payload)
+        {
+            const HANDLE event =
+                CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS);
+            hr = exported_fence->SetEventOnCompletion(semaphore_payload, event);
+            test_error(FAILED(hr), "Failed to set event on completion");
+            WaitForSingleObject(event, INFINITE);
+            CloseHandle(event);
+        }
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(signal_event);
+
+        // Release resources
+        CloseHandle(semaphore_handle);
+        test_error(clReleaseSemaphoreKHR(exportable_semaphore),
+                   "Could not release semaphore");
+        exported_fence->Release();
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a signal in OpenCL followed by a wait in DirectX 12 using an
 // exported semaphore will complete successfully
 REGISTER_TEST(test_external_semaphores_export_dx_wait)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-    GET_PFN(device, clGetSemaphoreInfoKHR);
-    GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
-
-    size_t export_types_size = 0;
-    errcode =
-        clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0,
-                        nullptr, &export_types_size);
-    test_error(errcode, "Could not query export semaphore handle types");
-    std::vector<cl_external_semaphore_handle_type_khr> export_types(
-        export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
-    errcode =
-        clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
-                        export_types_size, export_types.data(), nullptr);
-    test_error(errcode, "Could not query export semaphore handle types");
-
-    if (std::find(export_types.begin(), export_types.end(),
-                  CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
-        == export_types.end())
-    {
-        log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
-                 "the supported export types\n");
-        return TEST_FAIL;
-    }
-
-    constexpr cl_semaphore_properties_khr sem_props[] = {
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
-        0
-    };
-    cl_semaphore_khr semaphore =
-        clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
-    test_error(errcode, "Could not create semaphore");
-
-    cl_bool is_exportable = CL_FALSE;
-    errcode =
-        clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
-                              sizeof(is_exportable), &is_exportable, nullptr);
-    test_error(errcode, "Could not get semaphore info");
-    test_error(!is_exportable, "Semaphore is not exportable");
-
-    log_info("Calling clEnqueueSignalSemaphoresKHR\n");
-    constexpr cl_semaphore_payload_khr semaphore_payload = 1;
-    clEventWrapper signal_event;
-    errcode = clEnqueueSignalSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
-    test_error(errcode, "Failed to signal semaphore");
-
-    HANDLE semaphore_handle = nullptr;
-    errcode = clGetSemaphoreHandleForTypeKHR(
-        semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
-        sizeof(semaphore_handle), &semaphore_handle, nullptr);
-    test_error(errcode, "Could not get semaphore handle");
-
-    ID3D12Fence *fence = nullptr;
-    errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle,
-                                                         IID_PPV_ARGS(&fence));
-    test_error(errcode, "Could not open semaphore handle");
-
-    log_info("Calling dx_wrapper.get_d3d12_command_queue()->Wait()\n");
-    HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(fence, semaphore_payload);
-    test_error(FAILED(hr), "Failed to wait on D3D12 fence");
-
-    log_info("Calling WaitForSingleObject\n");
-    if (fence->GetCompletedValue() < semaphore_payload)
-    {
-        const HANDLE event =
-            CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
-        hr = fence->SetEventOnCompletion(semaphore_payload, event);
-        test_error(FAILED(hr), "Failed to set event on completion");
-        WaitForSingleObject(event, INFINITE);
-        CloseHandle(event);
-    }
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(signal_event);
-
-    // Release resources
-    CloseHandle(semaphore_handle);
-    test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore");
-    fence->Release();
-
-    return TEST_PASS;
+    return MakeAndRunTest<ExportDXWait>(device, context, queue, num_elements);
 }
\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_negative_wait_signal.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_negative_wait_signal.cpp
index 6c032c56..398eb2b1 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_negative_wait_signal.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_negative_wait_signal.cpp
@@ -16,74 +16,52 @@
 
 #include "semaphore_dx_fence_base.h"
 
+struct DXFenceNegativeWait final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    int Run() override
+    {
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
+                                             nullptr, nullptr);
+        test_assert_error(
+            errcode == CL_INVALID_VALUE,
+            "Unexpected error code returned from clEnqueueWaitSemaphores");
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a wait without a semaphore payload list will return
 // CL_INVALID_VALUE
 REGISTER_TEST(test_external_semaphores_dx_fence_negative_wait)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
-                                         nullptr, nullptr);
-    test_assert_error(
-        errcode == CL_INVALID_VALUE,
-        "Unexpected error code returned from clEnqueueWaitSemaphores");
-
-    return TEST_PASS;
+    return MakeAndRunTest<DXFenceNegativeWait>(device, context, queue,
+                                               num_elements);
 }
 
+struct DXFenceNegativeSignal final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    int Run() override
+    {
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
+                                               nullptr, nullptr);
+        test_assert_error(
+            errcode == CL_INVALID_VALUE,
+            "Unexpected error code returned from clEnqueueSignalSemaphores");
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a signal without a semaphore payload list will return
 // CL_INVALID_VALUE
 REGISTER_TEST(test_external_semaphores_dx_fence_negative_signal)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
-                                           nullptr, nullptr);
-    test_assert_error(
-        errcode == CL_INVALID_VALUE,
-        "Unexpected error code returned from clEnqueueSignalSemaphores");
-
-    return TEST_PASS;
+    return MakeAndRunTest<DXFenceNegativeSignal>(device, context, queue,
+                                                 num_elements);
 }
\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_queries.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_queries.cpp
index 03aa2b15..87cb0caa 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_queries.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_queries.cpp
@@ -16,54 +16,43 @@
 
 #include "semaphore_dx_fence_base.h"
 
+struct DXFenceQueryProperties final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    int Run() override
+    {
+        size_t properties_size_bytes = 0;
+        errcode = clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
+                                        0, nullptr, &properties_size_bytes);
+        test_error(errcode, "Could not get semaphore info");
+        std::vector<cl_semaphore_properties_khr> semaphore_properties(
+            properties_size_bytes / sizeof(cl_semaphore_properties_khr));
+        errcode = clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
+                                        properties_size_bytes,
+                                        semaphore_properties.data(), nullptr);
+        test_error(errcode, "Could not get semaphore info");
+
+        for (unsigned i = 0; i < semaphore_properties.size() - 1; i++)
+        {
+            if (semaphore_properties[i] == CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR
+                && semaphore_properties[i + 1]
+                    == reinterpret_cast<cl_semaphore_properties_khr>(
+                        fence_handle))
+            {
+                return TEST_PASS;
+            }
+        }
+        log_error("Failed to find the dx fence handle type in the semaphore "
+                  "properties");
+        return TEST_FAIL;
+    }
+};
+
 // Confirm that the CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR property is in the
 // properties returned by clGetSemaphoreInfo
 REGISTER_TEST(test_external_semaphores_dx_fence_query_properties)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clGetSemaphoreInfoKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    size_t properties_size_bytes = 0;
-    errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR, 0,
-                                    nullptr, &properties_size_bytes);
-    test_error(errcode, "Could not get semaphore info");
-    std::vector<cl_semaphore_properties_khr> semaphore_properties(
-        properties_size_bytes / sizeof(cl_semaphore_properties_khr));
-    errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
-                                    properties_size_bytes,
-                                    semaphore_properties.data(), nullptr);
-    test_error(errcode, "Could not get semaphore info");
-
-    for (unsigned i = 0; i < semaphore_properties.size() - 1; i++)
-    {
-        if (semaphore_properties[i] == CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR
-            && semaphore_properties[i + 1]
-                == reinterpret_cast<cl_semaphore_properties_khr>(
-                    semaphore.getHandle()))
-        {
-            return TEST_PASS;
-        }
-    }
-    log_error(
-        "Failed to find the dx fence handle type in the semaphore properties");
-    return TEST_FAIL;
+    return MakeAndRunTest<DXFenceQueryProperties>(device, context, queue,
+                                                  num_elements);
 }
\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h b/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h
index 372bdd1e..828a5e46 100644
--- a/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h
+++ b/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h
@@ -23,6 +23,7 @@
 #include "harness/deviceInfo.h"
 #include "harness/testHarness.h"
 #include "harness/typeWrappers.h"
+#include "harness/extensionHelpers.h"
 
 struct SemaphoreBase
 {
@@ -37,27 +38,15 @@ struct SemaphoreBase
         test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
 
         // If it is supported get the addresses of all the APIs here.
-        // clang-format off
-#define GET_EXTENSION_ADDRESS(FUNC)                                            \
-        FUNC = reinterpret_cast<FUNC##_fn>(                                    \
-            clGetExtensionFunctionAddressForPlatform(platform, #FUNC));        \
-        if (FUNC == nullptr)                                                   \
-        {                                                                      \
-            log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \
-                      " with " #FUNC "\n");                                    \
-            return TEST_FAIL;                                                  \
-        }
-        // clang-format on
+        GET_FUNCTION_EXTENSION_ADDRESS(device,
+                                       clCreateSemaphoreWithPropertiesKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueSignalSemaphoresKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueWaitSemaphoresKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clReleaseSemaphoreKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreInfoKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clRetainSemaphoreKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreHandleForTypeKHR);
 
-        GET_EXTENSION_ADDRESS(clCreateSemaphoreWithPropertiesKHR);
-        GET_EXTENSION_ADDRESS(clEnqueueSignalSemaphoresKHR);
-        GET_EXTENSION_ADDRESS(clEnqueueWaitSemaphoresKHR);
-        GET_EXTENSION_ADDRESS(clReleaseSemaphoreKHR);
-        GET_EXTENSION_ADDRESS(clGetSemaphoreInfoKHR);
-        GET_EXTENSION_ADDRESS(clRetainSemaphoreKHR);
-        GET_EXTENSION_ADDRESS(clGetSemaphoreHandleForTypeKHR);
-
-#undef GET_EXTENSION_ADDRESS
         return CL_SUCCESS;
     }
 

From 6f38c799c15852feaaae879c1113083dc767025f Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 27 Jan 2026 17:46:57 +0100
Subject: [PATCH 37/54] Added support for cl_ext_float_atomics in
 CBasicTestFetchAddSpecialFloats with atomic_half (#2386)

Related to #2142, according to the work plan, extending
CBasicTestFetchAddSpecialFloats with support for atomic_half.
---
 test_conformance/c11_atomics/common.h         |  54 +++--
 test_conformance/c11_atomics/main.cpp         |   6 +
 test_conformance/c11_atomics/test_atomics.cpp | 224 ++++++++++++++++++
 3 files changed, 266 insertions(+), 18 deletions(-)

diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index c9494c6c..5bf4cd0b 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -25,6 +25,7 @@
 #include "CL/cl_half.h"
 
 #include <iomanip>
+#include <limits>
 #include <sstream>
 #include <vector>
 
@@ -75,6 +76,10 @@ extern int
     gMaxDeviceThreads; // maximum number of threads executed on OCL device
 extern cl_device_atomic_capabilities gAtomicMemCap,
     gAtomicFenceCap; // atomic memory and fence capabilities for this device
+extern cl_half_rounding_mode gHalfRoundingMode;
+extern bool gFloatAtomicsSupported;
+extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
+extern cl_device_fp_config gHalfFPConfig;
 
 extern cl_half_rounding_mode gHalfRoundingMode;
 extern bool gFloatAtomicsSupported;
@@ -154,12 +159,12 @@ public:
         return 0;
     }
     CBasicTest(TExplicitAtomicType dataType, bool useSVM)
-        : CTest(), _maxDeviceThreads(MAX_DEVICE_THREADS), _dataType(dataType),
-          _useSVM(useSVM), _startValue(255), _localMemory(false),
-          _declaredInProgram(false), _usedInFunction(false),
-          _genericAddrSpace(false), _oldValueCheck(true),
-          _localRefValues(false), _maxGroupSize(0), _passCount(0),
-          _iterations(gInternalIterations)
+        : CTest(), _dataType(dataType), _useSVM(useSVM), _startValue(255),
+          _localMemory(false), _declaredInProgram(false),
+          _usedInFunction(false), _genericAddrSpace(false),
+          _oldValueCheck(true), _localRefValues(false), _maxGroupSize(0),
+          _passCount(0), _iterations(gInternalIterations),
+          _maxDeviceThreads(MAX_DEVICE_THREADS), _deviceThreads(0)
     {}
     virtual ~CBasicTest()
     {
@@ -240,12 +245,12 @@ public:
                                       cl_command_queue queue)
     {
         int error = 0;
-        DeclaredInProgram(false);
+        SetDeclaredInProgram(false);
         EXECUTE_TEST(error,
                      ExecuteForEachPointerType(deviceID, context, queue));
         if (!UseSVM())
         {
-            DeclaredInProgram(true);
+            SetDeclaredInProgram(true);
             EXECUTE_TEST(error,
                          ExecuteForEachPointerType(deviceID, context, queue));
         }
@@ -256,13 +261,13 @@ public:
                                            cl_command_queue queue)
     {
         int error = 0;
-        if (_maxDeviceThreads > 0 && !UseSVM())
+        if (_deviceThreads > 0 && !UseSVM())
         {
             SetLocalMemory(true);
             EXECUTE_TEST(
                 error, ExecuteForEachDeclarationType(deviceID, context, queue));
         }
-        if (_maxDeviceThreads + MaxHostThreads() > 0)
+        if (_deviceThreads + MaxHostThreads() > 0)
         {
             SetLocalMemory(false);
             EXECUTE_TEST(
@@ -271,7 +276,7 @@ public:
         return error;
     }
     virtual int Execute(cl_device_id deviceID, cl_context context,
-                        cl_command_queue queue, int num_elements)
+                        cl_command_queue queue, int num_elements) override
     {
         if (sizeof(HostAtomicType) != DataType().Size(deviceID))
         {
@@ -311,7 +316,12 @@ public:
             if (UseSVM()) return 0;
             _maxDeviceThreads = 0;
         }
-        if (_maxDeviceThreads + MaxHostThreads() == 0) return 0;
+
+        _deviceThreads = (num_elements > 0)
+            ? std::min(cl_uint(num_elements), _maxDeviceThreads)
+            : _maxDeviceThreads;
+
+        if (_deviceThreads + MaxHostThreads() == 0) return 0;
         return ExecuteForEachParameterSet(deviceID, context, queue);
     }
     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
@@ -324,7 +334,7 @@ public:
     {
         return AtomicTypeExtendedInfo<HostDataType>(_dataType);
     }
-    cl_uint _maxDeviceThreads;
+
     virtual cl_uint MaxHostThreads()
     {
         if (UseSVM() || gHost)
@@ -421,7 +431,7 @@ public:
     HostDataType StartValue() { return _startValue; }
     void SetLocalMemory(bool local) { _localMemory = local; }
     bool LocalMemory() { return _localMemory; }
-    void DeclaredInProgram(bool declaredInProgram)
+    void SetDeclaredInProgram(bool declaredInProgram)
     {
         _declaredInProgram = declaredInProgram;
     }
@@ -478,6 +488,8 @@ private:
     cl_uint _currentGroupSize;
     cl_uint _passCount;
     const cl_int _iterations;
+    cl_uint _maxDeviceThreads;
+    cl_uint _deviceThreads;
 };
 
 template <typename HostAtomicType, typename HostDataType>
@@ -912,9 +924,15 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
             + ss.str() + "] = {\n";
         ss.str("");
 
-        if constexpr (is_host_fp_v<HostDataType>)
-            ss << std::hexfloat
-               << _startValue; // use hex format for accurate representation
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            ss << std::setprecision(10) << _startValue;
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            ss << std::setprecision(std::numeric_limits<float>::max_digits10)
+               << cl_half_to_float(_startValue);
+        }
         else
             ss << _startValue;
 
@@ -1151,7 +1169,7 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
     MTdata d;
     size_t typeSize = DataType().Size(deviceID);
 
-    deviceThreadCount = _maxDeviceThreads;
+    deviceThreadCount = _deviceThreads;
     hostThreadCount = MaxHostThreads();
     threadCount = deviceThreadCount + hostThreadCount;
 
diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp
index 78291f06..1d20bb47 100644
--- a/test_conformance/c11_atomics/main.cpp
+++ b/test_conformance/c11_atomics/main.cpp
@@ -36,6 +36,7 @@ bool gFloatAtomicsSupported = false;
 cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
 cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps = 0;
 cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
+cl_device_fp_config gHalfFPConfig = 0;
 
 test_status InitCL(cl_device_id device) {
     auto version = get_device_cl_version(device);
@@ -169,6 +170,11 @@ test_status InitCL(cl_device_id device) {
                 log_error("Error while acquiring half rounding mode\n");
                 return TEST_FAIL;
             }
+
+            error =
+                clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG,
+                                sizeof(gHalfFPConfig), &gHalfFPConfig, NULL);
+            test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
         }
     }
 
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index 18c86a3b..135574d8 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -1418,6 +1418,219 @@ public:
     }
 };
 
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchAddSpecialFloats
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
+
+    std::vector<HostDataType> ref_vals;
+
+public:
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::DeclaredInProgram;
+    CBasicTestFetchAddSpecialFloats(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            // StartValue is used as an index divisor in the following test
+            // logic. It is set to the number of special values, which allows
+            // threads to be mapped deterministically onto the input data array.
+            // This enables repeated add operations arranged so that every
+            // special value is added to every other one (“all-to-all”).
+
+            auto spec_vals = GetSpecialValues();
+            StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode));
+            CBasicTestMemOrderScope<HostAtomicType,
+                                    HostDataType>::OldValueCheck(false);
+        }
+    }
+
+    static std::vector<HostDataType> &GetSpecialValues()
+    {
+        static std::vector<HostDataType> special_values;
+        if (special_values.empty())
+        {
+            if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+            {
+                special_values = {
+                    0xffff, 0x0000, 0x7c00, /*INFINITY*/
+                    0xfc00, /*-INFINITY*/
+                    0x8000, /*-0*/
+                    0x7bff, /*HALF_MAX*/
+                    0x0400, /*HALF_MIN*/
+                    0x3c00, /* 1 */
+                    0xbc00, /* -1 */
+                    0x3555, /*nearest value to 1/3*/
+                    0x3bff, /*largest number less than one*/
+                    0xc000, /* -2 */
+                    0xfbff, /* -HALF_MAX */
+                    0x8400, /* -HALF_MIN */
+                    0x4248, /* M_PI_H */
+                    0xc248, /* -M_PI_H */
+                    0xbbff, /* Largest negative fraction */
+                };
+
+                if (0 != (CL_FP_DENORM & gHalfFPConfig))
+                {
+                    special_values.push_back(0x0001 /* Smallest denormal */);
+                    special_values.push_back(0x03ff /* Largest denormal */);
+                }
+            }
+        }
+
+        return special_values;
+    }
+
+    bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+                      MTdata d) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (threadCount > ref_vals.size())
+            {
+                ref_vals.assign(threadCount, 0);
+                auto spec_vals = GetSpecialValues();
+
+                cl_uint total_cnt = 0;
+                while (total_cnt < threadCount)
+                {
+                    cl_uint block_cnt =
+                        std::min((cl_int)(threadCount - total_cnt),
+                                 (cl_int)spec_vals.size());
+                    memcpy(&ref_vals.at(total_cnt), spec_vals.data(),
+                           sizeof(HostDataType) * block_cnt);
+                    total_cnt += block_cnt;
+                }
+            }
+
+            memcpy(startRefValues, ref_vals.data(),
+                   sizeof(HostDataType) * threadCount);
+
+            return true;
+        }
+        return false;
+    }
+    std::string ProgramCore() override
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            // The start_value variable (set by StartValue) is used
+            // as a divisor of the thread index when selecting the operand for
+            // atomic_fetch_add. This groups threads into blocks corresponding
+            // to the number of special values and implements an “all-to-all”
+            // addition pattern. As a result, each destination element is
+            // updated using different combinations of input values, enabling
+            // consistent comparison between host and device execution.
+
+            return std::string(DataType().AddSubOperandTypeName())
+                + " start_value = atomic_load_explicit(destMemory+tid, "
+                  "memory_order_relaxed, memory_scope_work_group);\n"
+                  "  atomic_store_explicit(destMemory+tid, oldValues[tid], "
+                  "memory_order_relaxed, memory_scope_work_group);\n"
+                  "  atomic_fetch_add"
+                + postfix + "(&destMemory[tid], ("
+                + DataType().AddSubOperandTypeName()
+                + ")oldValues[tid/(int)start_value]" + memoryOrderScope
+                + ");\n";
+        }
+    }
+    void HostFunction(cl_uint tid, cl_uint threadCount,
+                      volatile HostAtomicType *destMemory,
+                      HostDataType *oldValues) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            auto spec_vals = GetSpecialValues();
+            host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
+                              MEMORY_ORDER_SEQ_CST);
+            host_atomic_fetch_add(
+                &destMemory[tid],
+                (HostDataType)oldValues[tid / spec_vals.size()], MemoryOrder());
+        }
+    }
+    bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                       HostDataType *startRefValues,
+                       cl_uint whichDestValue) override
+    {
+        expected = StartValue();
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            auto spec_vals = GetSpecialValues();
+            expected = cl_half_from_float(
+                cl_half_to_float(startRefValues[whichDestValue])
+                    + cl_half_to_float(
+                        startRefValues[whichDestValue / spec_vals.size()]),
+                gHalfRoundingMode);
+        }
+
+        return true;
+    }
+
+    bool IsTestNotAsExpected(const HostDataType &expected,
+                             const std::vector<HostAtomicType> &testValues,
+                             cl_uint whichDestValue) override
+    {
+
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            return static_cast<cl_half>(expected) != testValues[whichDestValue];
+        }
+
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
+                                                               testValues,
+                                                               whichDestValue);
+    }
+
+    int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (DeclaredInProgram()) return 0; // skip test - not applicable
+
+            if (LocalMemory()
+                && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0;
+
+            if (!CBasicTestMemOrderScope<HostAtomicType,
+                                         HostDataType>::LocalMemory()
+                && CBasicTestMemOrderScope<HostAtomicType,
+                                           HostDataType>::DeclaredInProgram())
+            {
+                if ((gHalfFPConfig & CL_FP_INF_NAN) == 0) return 0;
+            }
+        }
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+                                                             queue);
+    }
+    cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            return threadCount;
+        }
+        return CBasicTestMemOrderScope<HostAtomicType,
+                                       HostDataType>::NumResults(threadCount,
+                                                                 deviceID);
+    }
+};
+
 static int test_atomic_fetch_add_generic(cl_device_id deviceID,
                                          cl_context context,
                                          cl_command_queue queue,
@@ -1443,6 +1656,17 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
 
     if (gFloatAtomicsSupported)
     {
+        auto spec_vals_halfs =
+            CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF,
+                                            HOST_HALF>::GetSpecialValues();
+
+        CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF, HOST_HALF>
+            test_spec_half(TYPE_ATOMIC_HALF, useSVM);
+        EXECUTE_TEST(error,
+                     test_spec_half.Execute(deviceID, context, queue,
+                                            spec_vals_halfs.size()
+                                                * spec_vals_halfs.size()));
+
         CBasicTestFetchAdd<HOST_ATOMIC_HALF, HOST_HALF> test_half(
             TYPE_ATOMIC_HALF, useSVM);
         EXECUTE_TEST(error,

From 591a736918bdd9fd63ffe025155dcce4e1aba501 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Wed, 28 Jan 2026 20:14:45 +0100
Subject: [PATCH 38/54] vectors: improve error messages (#2609)

Clarify that the first number is the work-item.

Print hexadecimal numbers with a `0x` prefix.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/vectors/structs.cpp | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/test_conformance/vectors/structs.cpp b/test_conformance/vectors/structs.cpp
index c0757017..48a6473b 100644
--- a/test_conformance/vectors/structs.cpp
+++ b/test_conformance/vectors/structs.cpp
@@ -341,9 +341,9 @@ int checkCorrectnessAlign(bufferStruct *pBufferStruct, clState *pClState,
     {
         if ((targetArr[i]) % minAlign != (cl_uint)0)
         {
-            vlog_error(
-                "Error %zu (of %zu).  Expected a multiple of %zx, got %x\n", i,
-                pClState->m_numThreads, minAlign, targetArr[i]);
+            vlog_error("Error in work-item %zu (of %zu).  Expected a multiple "
+                       "of 0x%zx, got 0x%x\n",
+                       i, pClState->m_numThreads, minAlign, targetArr[i]);
             return -1;
         }
     }
@@ -371,8 +371,9 @@ int checkCorrectnessStep(bufferStruct *pBufferStruct, clState *pClState,
     {
         if (targetArr[i] != targetSize)
         {
-            vlog_error("Error %zu (of %zu).  Expected %d, got %d\n", i,
-                       pClState->m_numThreads, targetSize, targetArr[i]);
+            vlog_error(
+                "Error in work-item %zu (of %zu).  Expected %d, got %d\n", i,
+                pClState->m_numThreads, targetSize, targetArr[i]);
             return -1;
         }
     }
@@ -390,10 +391,11 @@ int checkPackedCorrectness(bufferStruct *pBufferStruct, clState *pClState,
     {
         if ((targetArr[i] - beforeSize) % totSize != (cl_uint)0)
         {
-            vlog_error(
-                "Error %zu (of %zu).  Expected %zu more than a multiple of "
-                "%zu, got %d \n",
-                i, pClState->m_numThreads, beforeSize, totSize, targetArr[i]);
+            vlog_error("Error in work-item %zu (of %zu).  Expected %zu more "
+                       "than a multiple of "
+                       "%zu, got %d \n",
+                       i, pClState->m_numThreads, beforeSize, totSize,
+                       targetArr[i]);
             return -1;
         }
     }

From 2d9e1cb92ec51bbb7ac4b20af234d8430c1c2885 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 3 Feb 2026 17:42:28 +0100
Subject: [PATCH 39/54] Add missing CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
 check in negative_set_default_device_command_queue (#2602)

Fixes #2566
---
 test_conformance/api/negative_queue.cpp | 44 +++++++++++++++----------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/test_conformance/api/negative_queue.cpp b/test_conformance/api/negative_queue.cpp
index 7da68b32..8959ca0b 100644
--- a/test_conformance/api/negative_queue.cpp
+++ b/test_conformance/api/negative_queue.cpp
@@ -349,24 +349,34 @@ REGISTER_TEST_VERSION(negative_set_default_device_command_queue, Version(2, 1))
     }
 
     {
-        constexpr cl_queue_properties props[] = {
-            CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
-        };
-        clCommandQueueWrapper not_on_device_queue =
-            clCreateCommandQueueWithProperties(context, device, props, &err);
-        test_error_fail(err, "clCreateCommandQueueWithProperties failed");
-        err = clSetDefaultDeviceCommandQueue(context, device,
-                                             not_on_device_queue);
-        if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE)
+        cl_command_queue_properties queue_properties;
+        err =
+            clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
+                            sizeof(queue_properties), &queue_properties, NULL);
+        test_error(err, "Unable to query CL_DEVICE_QUEUE_PROPERTIES");
+        if (queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
         {
-            log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
-                      "clSetDefaultDeviceCommandQueue should return "
-                      "CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE when: "
-                      "\"command_queue is not a valid command-queue for "
-                      "device\" using a command queue that is not on device",
-                      IGetErrorString(err),
-                      "CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE",
-                      __FILE__, __LINE__);
+            constexpr cl_queue_properties props[] = {
+                CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
+            };
+            clCommandQueueWrapper not_on_device_queue =
+                clCreateCommandQueueWithProperties(context, device, props,
+                                                   &err);
+            test_error_fail(err, "clCreateCommandQueueWithProperties failed");
+            err = clSetDefaultDeviceCommandQueue(context, device,
+                                                 not_on_device_queue);
+            if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE)
+            {
+                log_error(
+                    "ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
+                    "clSetDefaultDeviceCommandQueue should return "
+                    "CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE when: "
+                    "\"command_queue is not a valid command-queue for "
+                    "device\" using a command queue that is not on device",
+                    IGetErrorString(err),
+                    "CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE",
+                    __FILE__, __LINE__);
+            }
         }
     }
 

From 50ba8c8c1cc68111eade24a12f4af2a646d371cb Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin@google.com>
Date: Wed, 4 Feb 2026 08:11:24 -0800
Subject: [PATCH 40/54] fix typo in negative_context.cpp (#2611)

The message is not corresponding to what is actually tested.
---
 test_conformance/api/negative_context.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/api/negative_context.cpp b/test_conformance/api/negative_context.cpp
index a19f1977..23ace048 100644
--- a/test_conformance/api/negative_context.cpp
+++ b/test_conformance/api/negative_context.cpp
@@ -198,7 +198,7 @@ REGISTER_TEST(negative_create_context_from_type)
         {
             test_object_failure_ret(
                 tmp_context, err, CL_DEVICE_NOT_FOUND,
-                "clCreateContextFromType should return CL_DEVICE_NOT_AVAILABLE "
+                "clCreateContextFromType should return CL_DEVICE_NOT_FOUND "
                 "when: \"no devices that match device_type and property values "
                 "specified in properties are currently available\"",
                 TEST_FAIL);

From 9d3ae4c734382016dc0cd789f399b27519ad99dc Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 17 Feb 2026 17:41:46 +0100
Subject: [PATCH 41/54] Added support for cl_ext_float_atomics in
 CBasicTestFetchAddSpecialFloats with atomic_float (#2371)

Related to #2142, according to the work plan, extending
CBasicTestFetchAddSpecialFloats with support for atomic_float.
---
 test_conformance/c11_atomics/common.h         |  19 ++-
 test_conformance/c11_atomics/main.cpp         |   9 ++
 test_conformance/c11_atomics/test_atomics.cpp | 128 +++++++++++++++---
 3 files changed, 132 insertions(+), 24 deletions(-)

diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index 5bf4cd0b..5f917949 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -22,13 +22,13 @@
 
 #include "host_atomics.h"
 
-#include "CL/cl_half.h"
-
 #include <iomanip>
 #include <limits>
 #include <sstream>
 #include <vector>
 
+#include "CL/cl_half.h"
+
 #define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
 #define MAX_HOST_THREADS GetThreadCount()
 
@@ -76,9 +76,8 @@ extern int
     gMaxDeviceThreads; // maximum number of threads executed on OCL device
 extern cl_device_atomic_capabilities gAtomicMemCap,
     gAtomicFenceCap; // atomic memory and fence capabilities for this device
-extern cl_half_rounding_mode gHalfRoundingMode;
-extern bool gFloatAtomicsSupported;
-extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
+
+extern cl_device_fp_config gFloatFPConfig;
 extern cl_device_fp_config gHalfFPConfig;
 
 extern cl_half_rounding_mode gHalfRoundingMode;
@@ -183,6 +182,7 @@ public:
     {
         return false;
     }
+
     virtual bool
     IsTestNotAsExpected(const HostDataType &expected,
                         const std::vector<HostAtomicType> &testValues,
@@ -926,7 +926,14 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
 
         if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
         {
-            ss << std::setprecision(10) << _startValue;
+            if (std::isinf(_startValue))
+                ss << (_startValue < 0 ? "-" : "") << "INFINITY";
+            else if (std::isnan(_startValue))
+                ss << "0.0f / 0.0f";
+            else
+                ss << std::setprecision(
+                    std::numeric_limits<HostDataType>::max_digits10)
+                   << _startValue;
         }
         else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
         {
diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp
index 1d20bb47..7d636151 100644
--- a/test_conformance/c11_atomics/main.cpp
+++ b/test_conformance/c11_atomics/main.cpp
@@ -31,6 +31,8 @@ int gInternalIterations = 10000; // internal test iterations for atomic operatio
 int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device
 cl_device_atomic_capabilities gAtomicMemCap,
     gAtomicFenceCap; // atomic memory and fence capabilities for this device
+
+cl_device_fp_config gFloatFPConfig = 0;
 cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
 bool gFloatAtomicsSupported = false;
 cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
@@ -148,6 +150,13 @@ test_status InitCL(cl_device_id device) {
             sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
         test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
 
+        error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
+                                sizeof(gFloatFPConfig), &gFloatFPConfig, NULL);
+        test_error_ret(
+            error,
+            "Unable to run INFINITY/NAN tests (unable to get FP_CONFIG bits)",
+            TEST_FAIL);
+
         if (is_extension_available(device, "cl_khr_fp16"))
         {
             cl_int error = clGetDeviceInfo(
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index 135574d8..df6d1e58 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -1437,13 +1437,21 @@ public:
         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
                                                                 useSVM)
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        // StartValue is used as an index divisor in the following test
+        // logic. It is set to the number of special values, which allows
+        // threads to be mapped deterministically onto the input data array.
+        // This enables repeated add operations arranged so that every
+        // special value is added to every other one (“all-to-all”).
+
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            auto spec_vals = GetSpecialValues();
+            StartValue(spec_vals.size());
+            CBasicTestMemOrderScope<HostAtomicType,
+                                    HostDataType>::OldValueCheck(false);
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
         {
-            // StartValue is used as an index divisor in the following test
-            // logic. It is set to the number of special values, which allows
-            // threads to be mapped deterministically onto the input data array.
-            // This enables repeated add operations arranged so that every
-            // special value is added to every other one (“all-to-all”).
 
             auto spec_vals = GetSpecialValues();
             StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode));
@@ -1454,10 +1462,42 @@ public:
 
     static std::vector<HostDataType> &GetSpecialValues()
     {
+        const float test_value_zero = 0.0f;
+        const float test_value_minus_zero = -0.0f;
+        const float test_value_without_fraction = 2.0f;
+        const float test_value_with_fraction = 2.2f;
+
         static std::vector<HostDataType> special_values;
-        if (special_values.empty())
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
         {
-            if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+            if (special_values.empty())
+            {
+                special_values = {
+                    static_cast<HostDataType>(test_value_minus_zero),
+                    static_cast<HostDataType>(test_value_zero),
+                    static_cast<HostDataType>(test_value_without_fraction),
+                    static_cast<HostDataType>(test_value_with_fraction),
+                    std::numeric_limits<HostDataType>::infinity(),
+                    std::numeric_limits<HostDataType>::quiet_NaN(),
+                    std::numeric_limits<HostDataType>::signaling_NaN(),
+                    -std::numeric_limits<HostDataType>::infinity(),
+                    -std::numeric_limits<HostDataType>::quiet_NaN(),
+                    -std::numeric_limits<HostDataType>::signaling_NaN(),
+                    std::numeric_limits<HostDataType>::lowest(),
+                    std::numeric_limits<HostDataType>::min(),
+                    std::numeric_limits<HostDataType>::max(),
+                };
+
+                if (0 != (CL_FP_DENORM & gFloatFPConfig))
+                {
+                    special_values.push_back(
+                        std::numeric_limits<HostDataType>::denorm_min());
+                }
+            }
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (special_values.empty())
             {
                 special_values = {
                     0xffff, 0x0000, 0x7c00, /*INFINITY*/
@@ -1491,7 +1531,10 @@ public:
     bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                       MTdata d) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             if (threadCount > ref_vals.size())
             {
@@ -1522,7 +1565,10 @@ public:
         std::string memoryOrderScope = MemoryOrderScopeStr();
         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
 
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             // The start_value variable (set by StartValue) is used
             // as a divisor of the thread index when selecting the operand for
@@ -1548,7 +1594,10 @@ public:
                       volatile HostAtomicType *destMemory,
                       HostDataType *oldValues) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             auto spec_vals = GetSpecialValues();
             host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
@@ -1563,7 +1612,14 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            auto spec_vals = GetSpecialValues();
+            expected = startRefValues[whichDestValue]
+                + startRefValues[whichDestValue / spec_vals.size()];
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
         {
             auto spec_vals = GetSpecialValues();
             expected = cl_half_from_float(
@@ -1575,13 +1631,18 @@ public:
 
         return true;
     }
-
     bool IsTestNotAsExpected(const HostDataType &expected,
                              const std::vector<HostAtomicType> &testValues,
                              cl_uint whichDestValue) override
     {
-
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            if (std::isnan(testValues[whichDestValue]) && std::isnan(expected))
+                return false;
+            else
+                return expected != testValues[whichDestValue];
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
         {
             return static_cast<cl_half>(expected) != testValues[whichDestValue];
         }
@@ -1591,11 +1652,28 @@ public:
                                                                testValues,
                                                                whichDestValue);
     }
-
     int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                           cl_command_queue queue) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            if (LocalMemory()
+                && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0;
+
+            if (!CBasicTestMemOrderScope<HostAtomicType,
+                                         HostDataType>::LocalMemory()
+                && CBasicTestMemOrderScope<HostAtomicType,
+                                           HostDataType>::DeclaredInProgram())
+            {
+                if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0;
+            }
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
         {
             if (DeclaredInProgram()) return 0; // skip test - not applicable
 
@@ -1621,7 +1699,10 @@ public:
     }
     cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             return threadCount;
         }
@@ -1656,6 +1737,17 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
 
     if (gFloatAtomicsSupported)
     {
+        auto spec_vals_fp32 =
+            CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_FLOAT,
+                                            HOST_FLOAT>::GetSpecialValues();
+
+        CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_FLOAT, HOST_FLOAT>
+            test_spec_float(TYPE_ATOMIC_FLOAT, useSVM);
+        EXECUTE_TEST(error,
+                     test_spec_float.Execute(deviceID, context, queue,
+                                             spec_vals_fp32.size()
+                                                 * spec_vals_fp32.size()));
+
         auto spec_vals_halfs =
             CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF,
                                             HOST_HALF>::GetSpecialValues();

From 3262ea3f45e8fe3800403959dd4b32c949fb272e Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 17 Feb 2026 17:43:46 +0100
Subject: [PATCH 42/54] Fix test_vulkan_api_consistency validation errors for
 missing vkFreeMemory (#2604)

Fixes vulkan validation layer error:

Vulkan validation layer: Validation Error: [
VUID-vkDestroyDevice-device-05137 ] Object 0: handle =
0xf56c9b0000000004, type = VK_OBJECT_TYPE_DEVICE_MEMORY; | MessageID =
0x4872eaa0 | vkCreateDevice(): OBJ ERROR : For VkDevice
0x5555598c2400[], VkDeviceMemory 0xf56c9b0000000004[] has not been
destroyed. The Vulkan spec states: All child objects created on device
must have been destroyed prior to destroying device
(https://vulkan.lunarg.com/doc/view/1.3.275.0/linux/1.3-extensions/vkspec.html#VUID-vkDestroyDevice-device-05137)
---
 test_conformance/vulkan/test_vulkan_api_consistency.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
index bd19987b..0d09a748 100644
--- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp
+++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
@@ -26,6 +26,7 @@
 #endif
 
 #include <assert.h>
+#include <memory>
 #include <vector>
 #include <iostream>
 #include <string.h>
@@ -79,9 +80,9 @@ struct ConsistencyExternalBufferTest : public VulkanTestBase
 
         VulkanBufferList vkBufferList(1, *vkDevice, bufferSize,
                                       vkExternalMemoryHandleType);
-        VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(
+        std::unique_ptr<VulkanDeviceMemory> vkDeviceMem(new VulkanDeviceMemory(
             *vkDevice, vkBufferList[0], memoryTypeList[0],
-            vkExternalMemoryHandleType);
+            vkExternalMemoryHandleType));
 
         vkDeviceMem->bindBuffer(vkBufferList[0], 0);
 

From 764b77ad83ba1828eee2ea0b2f3d3ab2a8eb237f Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 17 Feb 2026 17:45:07 +0100
Subject: [PATCH 43/54] Fix test_vulkan_interop_buffer validation errors for
 Int8 storage shader (#2603)

Fixes vulkan validation layer error:

Vulkan validation layer: Validation Error: [
VUID-VkShaderModuleCreateInfo-pCode-08740 ] | MessageID = 0x6e224e9 |
vkCreateComputePipelines(): pCreateInfos[0].stage SPIR-V Capability Int8
was declared, but one of the following requirements is required
(VkPhysicalDeviceVulkan12Features::shaderInt8). The Vulkan spec states:
If pCode is a pointer to SPIR-V code, and pCode declares any of the
capabilities listed in the SPIR-V Environment appendix, one of the
corresponding requirements must be satisfied
(https://vulkan.lunarg.com/doc/view/1.3.275.0/linux/1.3-extensions/vkspec.html#VUID-VkShaderModuleCreateInfo-pCode-08740)
---
 .../common/vulkan_wrapper/vulkan_api_list.hpp |  4 +-
 .../common/vulkan_wrapper/vulkan_wrapper.cpp  | 54 ++++++++++++++++++-
 .../common/vulkan_wrapper/vulkan_wrapper.hpp  |  3 +-
 .../vulkan/test_vulkan_interop_buffer.cpp     |  2 +-
 test_conformance/vulkan/vulkan_test_base.h    |  6 ++-
 5 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
index a474102d..b1f1b8c9 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
@@ -104,7 +104,8 @@
     VK_FUNC_DECL(vkGetImageSubresourceLayout)                                  \
     VK_FUNC_DECL(vkCreateDebugUtilsMessengerEXT)                               \
     VK_FUNC_DECL(vkDestroyDebugUtilsMessengerEXT)                              \
-    VK_FUNC_DECL(vkGetPhysicalDeviceExternalBufferProperties)
+    VK_FUNC_DECL(vkGetPhysicalDeviceExternalBufferProperties)                  \
+    VK_FUNC_DECL(vkGetPhysicalDeviceFeatures2)
 #define VK_WINDOWS_FUNC_LIST                                                   \
     VK_FUNC_DECL(vkGetMemoryWin32HandleKHR)                                    \
     VK_FUNC_DECL(vkGetSemaphoreWin32HandleKHR)                                 \
@@ -209,5 +210,6 @@
 #define vkDestroyDebugUtilsMessengerEXT _vkDestroyDebugUtilsMessengerEXT
 #define vkGetPhysicalDeviceExternalBufferProperties                            \
     _vkGetPhysicalDeviceExternalBufferProperties
+#define vkGetPhysicalDeviceFeatures2 _vkGetPhysicalDeviceFeatures2
 
 #endif //_vulkan_api_list_hpp_
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
index 7254742d..e1c81086 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
@@ -147,6 +147,7 @@ VulkanInstance::VulkanInstance(bool useValidationLayers)
         // return WAIVED;
     }
 
+    VK_GET_NULL_INSTANCE_PROC_ADDR(vkGetPhysicalDeviceFeatures2);
     VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceVersion);
     VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceLayerProperties);
     VK_GET_NULL_INSTANCE_PROC_ADDR(vkCreateInstance);
@@ -612,7 +613,8 @@ VulkanDevice::VulkanDevice(const VulkanDevice &device)
 
 VulkanDevice::VulkanDevice(
     const VulkanPhysicalDevice &physicalDevice,
-    const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap)
+    const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap,
+    bool useShaderInt8)
     : m_physicalDevice(physicalDevice), m_vkDevice(NULL)
 {
     uint32_t maxQueueCount = 0;
@@ -676,7 +678,55 @@ VulkanDevice::VulkanDevice(
         enabledExtensionNameList.data();
     vkDeviceCreateInfo.pEnabledFeatures = NULL;
 
-    vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice);
+    if (useShaderInt8)
+    {
+        VkPhysicalDeviceShaderFloat16Int8Features int8Features{};
+        int8Features.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
+
+        VkPhysicalDevice8BitStorageFeatures storage8Features{};
+        storage8Features.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES;
+
+        int8Features.pNext = &storage8Features;
+
+        VkPhysicalDeviceFeatures2 features2{};
+        features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+        features2.pNext = &int8Features;
+
+        vkGetPhysicalDeviceFeatures2(physicalDevice, &features2);
+
+        if (!int8Features.shaderInt8
+            || !storage8Features.storageBuffer8BitAccess)
+        {
+            throw std::runtime_error("shaderInt8 not supported!\n");
+        }
+
+        VkPhysicalDevice8BitStorageFeatures storage8Enable{};
+        storage8Enable.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES;
+        storage8Enable.storageBuffer8BitAccess = VK_TRUE;
+
+        VkPhysicalDeviceShaderFloat16Int8Features int8Enable{};
+        int8Enable.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
+        int8Enable.shaderInt8 = VK_TRUE;
+        int8Enable.pNext = &storage8Enable;
+
+        vkDeviceCreateInfo.pNext = &int8Enable;
+
+        enabledExtensionNameList.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
+        vkDeviceCreateInfo.ppEnabledExtensionNames =
+            enabledExtensionNameList.data();
+        vkDeviceCreateInfo.enabledExtensionCount =
+            (uint32_t)enabledExtensionNameList.size();
+
+        vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice);
+    }
+    else
+    {
+        vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice);
+    }
 
     for (uint32_t qfIdx = 0;
          qfIdx < (uint32_t)m_physicalDevice.getQueueFamilyList().size();
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
index 9cb031f5..fb707963 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
@@ -148,7 +148,8 @@ public:
     VulkanDevice(
         const VulkanPhysicalDevice &physicalDevice = getVulkanPhysicalDevice(),
         const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap =
-            getDefaultVulkanQueueFamilyToQueueCountMap());
+            getDefaultVulkanQueueFamilyToQueueCountMap(),
+        bool useShaderInt8 = false);
     virtual ~VulkanDevice();
     const VulkanPhysicalDevice &getPhysicalDevice() const;
     VulkanQueue &
diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
index e39641f1..23578e06 100644
--- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
@@ -1586,7 +1586,7 @@ struct BufferTestBase : public VulkanTestBase
 {
     BufferTestBase(cl_device_id device, cl_context context,
                    cl_command_queue queue, cl_int nelems)
-        : VulkanTestBase(device, context, queue, nelems)
+        : VulkanTestBase(device, context, queue, nelems, true)
     {}
 
     int test_buffer_common(bool use_fence)
diff --git a/test_conformance/vulkan/vulkan_test_base.h b/test_conformance/vulkan/vulkan_test_base.h
index 8c7b07cc..1e3f8e2f 100644
--- a/test_conformance/vulkan/vulkan_test_base.h
+++ b/test_conformance/vulkan/vulkan_test_base.h
@@ -37,11 +37,13 @@ inline void params_reset()
 struct VulkanTestBase
 {
     VulkanTestBase(cl_device_id device, cl_context context,
-                   cl_command_queue queue, cl_int nelems)
+                   cl_command_queue queue, cl_int nelems,
+                   bool useShaderInt8 = false)
         : device(device), context(context), num_elems(nelems)
     {
         vkDevice.reset(new VulkanDevice(
-            getAssociatedVulkanPhysicalDevice(device, useValidationLayers)));
+            getAssociatedVulkanPhysicalDevice(device, useValidationLayers),
+            getDefaultVulkanQueueFamilyToQueueCountMap(), useShaderInt8));
 
         cl_platform_id platform;
         cl_int error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,

From be530df8a8c79cc7b38e50b6537457d0530f418f Mon Sep 17 00:00:00 2001
From: gorazd-sumkovski-arm
 <161028652+gorazd-sumkovski-arm@users.noreply.github.com>
Date: Tue, 17 Feb 2026 18:26:49 +0000
Subject: [PATCH 44/54] Fix size parameters in `cl_ext_buffer_device_address`
 tests (#2585)

The `DeviceAddrFromAPI` variable is of type `cl_mem_device_address_ext`.
But on 32-bit systems `sizeof(void*) = 4 < 8 =
sizeof(cl_mem_device_address_ext)`. Pass `sizeof(DeviceAddrFromAPI)`
instead.
---
 .../buffer_device_address.cpp                          | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp b/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp
index 50eb5011..8e8ced30 100644
--- a/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp
+++ b/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp
@@ -313,8 +313,8 @@ private:
             // A basic buffer used to pass the other buffer's address.
             error = clEnqueueWriteBuffer(queue, buffer_in_long,
                                          CL_TRUE, // block
-                                         0, sizeof(cl_long), &DeviceAddrFromAPI,
-                                         0, NULL, NULL);
+                                         0, sizeof(DeviceAddrFromAPI),
+                                         &DeviceAddrFromAPI, 0, NULL, NULL);
         test_error_fail(error,
                         "clEnqueueWriteBuffer of dev_addr_buffer failed\n");
 
@@ -325,9 +325,9 @@ private:
                                &buffer_out_int);
         test_error_fail(error, "clSetKernelArg 1 failed\n");
 
-        error = clSetKernelExecInfo(ind_access_kernel,
-                                    CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT,
-                                    sizeof(void *), &DeviceAddrFromAPI);
+        error = clSetKernelExecInfo(
+            ind_access_kernel, CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT,
+            sizeof(DeviceAddrFromAPI), &DeviceAddrFromAPI);
         test_error_fail(error,
                         "Setting indirect access for "
                         "device ptrs failed!\n");

From ddc163a03ae010eb3f3eb6162f7b70132d104544 Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Tue, 17 Feb 2026 19:27:36 +0100
Subject: [PATCH 45/54] Fix conversion data loss in test api min max constant
 buffer size (#1359)

It fix issue when bigger sizes above UINT_MAX are reported. Then test
works and display results incorrectly.
---
 test_conformance/api/test_api_min_max.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp
index 29677623..fd8c3ee5 100644
--- a/test_conformance/api/test_api_min_max.cpp
+++ b/test_conformance/api/test_api_min_max.cpp
@@ -1526,7 +1526,7 @@ REGISTER_TEST(min_max_constant_buffer_size)
     size_t threads[1], localThreads[1];
     cl_int *constantData, *resultData;
     cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize;
-    int i;
+    size_t i;
     cl_event event;
     cl_int event_status;
     MTdata d;
@@ -1556,6 +1556,8 @@ REGISTER_TEST(min_max_constant_buffer_size)
 
     maxAllocSize = get_device_info_max_mem_alloc_size(
         device, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
+    log_info("Reported max alloc size of %" PRIu64 " bytes.\n",
+             (uint64_t)maxAllocSize);
 
     if (maxSize > maxAllocSize) maxSize = maxAllocSize;
 
@@ -1590,7 +1592,7 @@ REGISTER_TEST(min_max_constant_buffer_size)
             return EXIT_FAILURE;
         }
 
-        for (i = 0; i < (int)(numberOfInts); i++)
+        for (i = 0; i < numberOfInts; i++)
             constantData[i] = (int)genrand_int32(d);
 
         clMemWrapper streams[3];
@@ -1678,11 +1680,11 @@ REGISTER_TEST(min_max_constant_buffer_size)
                                     sizeToAllocate, resultData, 0, NULL, NULL);
         test_error(error, "clEnqueueReadBuffer failed");
 
-        for (i = 0; i < (int)(numberOfInts); i++)
+        for (i = 0; i < numberOfInts; i++)
             if (constantData[i] != resultData[i])
             {
-                log_error("Data failed to verify: constantData[%d]=%d != "
-                          "resultData[%d]=%d\n",
+                log_error("Data failed to verify: constantData[%zu]=%d != "
+                          "resultData[%zu]=%d\n",
                           i, constantData[i], i, resultData[i]);
                 free(constantData);
                 free(resultData);

From 662e53b60d10bb795f527487e19db708cfffb8b0 Mon Sep 17 00:00:00 2001
From: Ole Strohm <ole.strohm@arm.com>
Date: Thu, 19 Feb 2026 00:04:47 +0000
Subject: [PATCH 46/54] Fix new lines in error logging (#2617)

This is a small patch to fix a new line when logging an error.
---
 test_conformance/api/test_spirv_queries.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/api/test_spirv_queries.cpp b/test_conformance/api/test_spirv_queries.cpp
index 720f73b1..d1536e0b 100644
--- a/test_conformance/api/test_spirv_queries.cpp
+++ b/test_conformance/api/test_spirv_queries.cpp
@@ -757,7 +757,7 @@ REGISTER_TEST(spirv_query_dependencies)
         }
         for (const auto& extension_dep : it->second.extensions)
         {
-            log_error("Checked for SPIR-V extension %s.n",
+            log_error("Checked for SPIR-V extension %s.\n",
                       extension_dep.c_str());
         }
         return TEST_FAIL;

From 5673883005aad3f8bec6fa64e1bfb663e85ee1af Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 24 Feb 2026 17:37:29 +0100
Subject: [PATCH 47/54] Added support for cl_ext_float_atomics in
 CBasicTestFetchAddSpecialFloats with atomic_double (#2388)

Related to #2142, according to the work plan, extending
CBasicTestFetchAddSpecialFloats with support for atomic_double.
---
 test_conformance/c11_atomics/common.h         |  8 +-
 test_conformance/c11_atomics/main.cpp         |  7 +-
 test_conformance/c11_atomics/test_atomics.cpp | 98 ++++++++++++++-----
 3 files changed, 86 insertions(+), 27 deletions(-)

diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index 5f917949..6a8a0082 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -77,6 +77,7 @@ extern int
 extern cl_device_atomic_capabilities gAtomicMemCap,
     gAtomicFenceCap; // atomic memory and fence capabilities for this device
 
+extern cl_device_fp_config gDoubleFPConfig;
 extern cl_device_fp_config gFloatFPConfig;
 extern cl_device_fp_config gHalfFPConfig;
 
@@ -924,12 +925,15 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
             + ss.str() + "] = {\n";
         ss.str("");
 
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_ATOMIC_DOUBLE> || std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
         {
             if (std::isinf(_startValue))
                 ss << (_startValue < 0 ? "-" : "") << "INFINITY";
             else if (std::isnan(_startValue))
-                ss << "0.0f / 0.0f";
+                ss << "0.0 / 0.0";
             else
                 ss << std::setprecision(
                     std::numeric_limits<HostDataType>::max_digits10)
diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp
index 7d636151..ee83dfdb 100644
--- a/test_conformance/c11_atomics/main.cpp
+++ b/test_conformance/c11_atomics/main.cpp
@@ -31,7 +31,7 @@ int gInternalIterations = 10000; // internal test iterations for atomic operatio
 int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device
 cl_device_atomic_capabilities gAtomicMemCap,
     gAtomicFenceCap; // atomic memory and fence capabilities for this device
-
+cl_device_fp_config gDoubleFPConfig = 0;
 cl_device_fp_config gFloatFPConfig = 0;
 cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
 bool gFloatAtomicsSupported = false;
@@ -143,6 +143,11 @@ test_status InitCL(cl_device_id device) {
                 device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
                 sizeof(gDoubleAtomicCaps), &gDoubleAtomicCaps, nullptr);
             test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
+
+            error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
+                                    sizeof(gDoubleFPConfig), &gDoubleFPConfig,
+                                    NULL);
+            test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
         }
 
         cl_int error = clGetDeviceInfo(
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index df6d1e58..06f2fd9e 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -1442,8 +1442,10 @@ public:
         // threads to be mapped deterministically onto the input data array.
         // This enables repeated add operations arranged so that every
         // special value is added to every other one (“all-to-all”).
-
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             auto spec_vals = GetSpecialValues();
             StartValue(spec_vals.size());
@@ -1452,7 +1454,6 @@ public:
         }
         else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
         {
-
             auto spec_vals = GetSpecialValues();
             StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode));
             CBasicTestMemOrderScope<HostAtomicType,
@@ -1462,14 +1463,21 @@ public:
 
     static std::vector<HostDataType> &GetSpecialValues()
     {
-        const float test_value_zero = 0.0f;
-        const float test_value_minus_zero = -0.0f;
-        const float test_value_without_fraction = 2.0f;
-        const float test_value_with_fraction = 2.2f;
-
         static std::vector<HostDataType> special_values;
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
+            const HostDataType test_value_zero =
+                static_cast<HostDataType>(0.0f);
+            const HostDataType test_value_minus_zero =
+                static_cast<HostDataType>(-0.0f);
+            const HostDataType test_value_without_fraction =
+                static_cast<HostDataType>(2.0f);
+            const HostDataType test_value_with_fraction =
+                static_cast<HostDataType>(2.2f);
+
             if (special_values.empty())
             {
                 special_values = {
@@ -1488,10 +1496,21 @@ public:
                     std::numeric_limits<HostDataType>::max(),
                 };
 
-                if (0 != (CL_FP_DENORM & gFloatFPConfig))
+                if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
                 {
-                    special_values.push_back(
-                        std::numeric_limits<HostDataType>::denorm_min());
+                    if (0 != (CL_FP_DENORM & gDoubleFPConfig))
+                    {
+                        special_values.push_back(
+                            std::numeric_limits<HostDataType>::denorm_min());
+                    }
+                }
+                else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+                {
+                    if (0 != (CL_FP_DENORM & gFloatFPConfig))
+                    {
+                        special_values.push_back(
+                            std::numeric_limits<HostDataType>::denorm_min());
+                    }
                 }
             }
         }
@@ -1524,7 +1543,6 @@ public:
                 }
             }
         }
-
         return special_values;
     }
 
@@ -1534,7 +1552,7 @@ public:
         if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             if (threadCount > ref_vals.size())
             {
@@ -1568,7 +1586,7 @@ public:
         if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             // The start_value variable (set by StartValue) is used
             // as a divisor of the thread index when selecting the operand for
@@ -1597,7 +1615,7 @@ public:
         if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             auto spec_vals = GetSpecialValues();
             host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
@@ -1612,8 +1630,10 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             auto spec_vals = GetSpecialValues();
             expected = startRefValues[whichDestValue]
@@ -1635,17 +1655,17 @@ public:
                              const std::vector<HostAtomicType> &testValues,
                              cl_uint whichDestValue) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            return static_cast<cl_half>(expected) != testValues[whichDestValue];
+        }
+        else
         {
             if (std::isnan(testValues[whichDestValue]) && std::isnan(expected))
                 return false;
             else
                 return expected != testValues[whichDestValue];
         }
-        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            return static_cast<cl_half>(expected) != testValues[whichDestValue];
-        }
 
         return CBasicTestMemOrderScope<
             HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
@@ -1655,6 +1675,25 @@ public:
     int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                           cl_command_queue queue) override
     {
+        if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
+        {
+            if (LocalMemory()
+                && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
+                    == 0)
+                return 0;
+
+            if (!CBasicTestMemOrderScope<HostAtomicType,
+                                         HostDataType>::LocalMemory()
+                && CBasicTestMemOrderScope<HostAtomicType,
+                                           HostDataType>::DeclaredInProgram())
+            {
+                if ((gDoubleFPConfig & CL_FP_INF_NAN) == 0) return 0;
+            }
+        }
         if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             if (LocalMemory()
@@ -1702,7 +1741,7 @@ public:
         if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             return threadCount;
         }
@@ -1737,6 +1776,17 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
 
     if (gFloatAtomicsSupported)
     {
+        auto spec_vals_fp64 =
+            CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_DOUBLE,
+                                            HOST_DOUBLE>::GetSpecialValues();
+
+        CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_DOUBLE, HOST_DOUBLE>
+            test_spec_double(TYPE_ATOMIC_DOUBLE, useSVM);
+        EXECUTE_TEST(error,
+                     test_spec_double.Execute(deviceID, context, queue,
+                                              spec_vals_fp64.size()
+                                                  * spec_vals_fp64.size()));
+
         auto spec_vals_fp32 =
             CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_FLOAT,
                                             HOST_FLOAT>::GetSpecialValues();

From 1e9f2f6aa2dfb976e9919ed94d9ad2d1f3208c16 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 3 Mar 2026 17:39:56 +0100
Subject: [PATCH 48/54] Fix test_vulkan_interop_buffer validation errors for
 missing vkUpdateDescriptorSets (#2606)

Fixes vulkan validation layer error:

Vulkan validation layer: Validation Error: [
VUID-vkCmdDispatch-None-08114 ] Object 0: handle = 0xb9181f0000000029,
type = VK_OBJECT_TYPE_DESCRIPTOR_SET; | MessageID = 0x30b6e267 |
vkCmdDispatch(): the descriptor VkDescriptorSet 0xb9181f0000000029[]
[Set 0, Binding 1, Index 1, variable "bufferPtrList"] is being used in
dispatch but has never been updated via vkUpdateDescriptorSets() or a
similar call. The Vulkan spec states: Descriptors in each bound
descriptor set, specified via vkCmdBindDescriptorSets, must be valid as
described by descriptor validity if they are statically used by the
VkPipeline bound to the pipeline bind point used by this command and the
bound VkPipeline was not created with
VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT
(https://vulkan.lunarg.com/doc/view/1.4.304.0/windows/1.4-extensions/vkspec.html#VUID-vkCmdDispatch-None-08114)
---
 .../common/vulkan_wrapper/vulkan_wrapper.cpp  |  5 +-
 .../common/vulkan_wrapper/vulkan_wrapper.hpp  |  3 +-
 test_conformance/vulkan/shaders/buffer.comp   | 54 +++++++-------
 .../vulkan/test_vulkan_interop_buffer.cpp     | 72 +++++++++++++++----
 4 files changed, 93 insertions(+), 41 deletions(-)

diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
index e1c81086..08506d1c 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
@@ -1121,7 +1121,8 @@ VulkanComputePipeline::VulkanComputePipeline(
 
 VulkanComputePipeline::VulkanComputePipeline(
     const VulkanDevice &device, const VulkanPipelineLayout &pipelineLayout,
-    const VulkanShaderModule &shaderModule, const std::string &entryFuncName)
+    const VulkanShaderModule &shaderModule, const std::string &entryFuncName,
+    const VkSpecializationInfo *spec)
     : VulkanPipeline(device)
 {
     VkPipelineShaderStageCreateInfo vkPipelineShaderStageCreateInfo = {};
@@ -1134,6 +1135,8 @@ VulkanComputePipeline::VulkanComputePipeline(
     vkPipelineShaderStageCreateInfo.pName = entryFuncName.c_str();
     vkPipelineShaderStageCreateInfo.pSpecializationInfo = NULL;
 
+    if (spec) vkPipelineShaderStageCreateInfo.pSpecializationInfo = spec;
+
     VkComputePipelineCreateInfo vkComputePipelineCreateInfo = {};
     vkComputePipelineCreateInfo.sType =
         VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
index fb707963..25e1d409 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
@@ -297,7 +297,8 @@ public:
     VulkanComputePipeline(const VulkanDevice &device,
                           const VulkanPipelineLayout &pipelineLayout,
                           const VulkanShaderModule &shaderModule,
-                          const std::string &entryFuncName = "main");
+                          const std::string &entryFuncName = "main",
+                          const VkSpecializationInfo *spec = nullptr);
     virtual ~VulkanComputePipeline();
     VulkanPipelineBindPoint getPipelineBindPoint() const;
 };
diff --git a/test_conformance/vulkan/shaders/buffer.comp b/test_conformance/vulkan/shaders/buffer.comp
index 3e4eae55..3d059ce5 100644
--- a/test_conformance/vulkan/shaders/buffer.comp
+++ b/test_conformance/vulkan/shaders/buffer.comp
@@ -1,28 +1,28 @@
-#version 450
-#extension GL_ARB_separate_shader_objects : enable
-#extension GL_EXT_shader_explicit_arithmetic_types_int8    : enable
-#extension GL_EXT_shader_explicit_arithmetic_types_int32   : enable
-
-#define MAX_BUFFERS 5
-
-layout(binding = 0) buffer Params
-{
-  uint32_t numBuffers;
-  uint32_t bufferSize;
-  uint32_t interBufferOffset;
-};
-layout(binding = 1) buffer Buffer
-{
-  uint8_t ptr[];
-} bufferPtrList[MAX_BUFFERS];
-layout(local_size_x = 128) in;
-void main() {
-    for (uint32_t bufIdx = 0; bufIdx < numBuffers; bufIdx++) {
-        uint32_t ptrIdx = gl_GlobalInvocationID.x;
-        uint32_t limit = bufferSize;
-        while (ptrIdx < limit) {
-            bufferPtrList[bufIdx].ptr[ptrIdx]++;
-            ptrIdx += (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
-        }
-    }
+#version 450
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8    : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int32   : enable
+
+layout(constant_id = 0) const uint MAX_BUFFERS = 5;
+
+layout(binding = 0) buffer Params
+{
+  uint32_t numBuffers;
+  uint32_t bufferSize;
+  uint32_t interBufferOffset;
+};
+layout(binding = 1) buffer Buffer
+{
+  uint8_t ptr[];
+} bufferPtrList[MAX_BUFFERS];
+layout(local_size_x = 128) in;
+void main() {
+    for (uint32_t bufIdx = 0; bufIdx < numBuffers; bufIdx++) {
+        uint32_t ptrIdx = gl_GlobalInvocationID.x;
+        uint32_t limit = bufferSize;
+        while (ptrIdx < limit) {
+            bufferPtrList[bufIdx].ptr[ptrIdx]++;
+            ptrIdx += (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
+        }
+    }
 }
\ No newline at end of file
diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
index 23578e06..9e9a7fcb 100644
--- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
@@ -128,12 +128,24 @@ int run_test_with_two_queue(
     vkDescriptorSetLayoutBindingList.addBinding(
         0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
     vkDescriptorSetLayoutBindingList.addBinding(
-        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
+        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
     VulkanDescriptorSetLayout vkDescriptorSetLayout(
         vkDevice, vkDescriptorSetLayoutBindingList);
     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
-    VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
-                                            vkBufferShaderModule);
+
+    VkSpecializationMapEntry entry;
+    entry.constantID = 0;
+    entry.offset = 0;
+    entry.size = sizeof(uint32_t);
+
+    VkSpecializationInfo spec;
+    spec.mapEntryCount = 1;
+    spec.pMapEntries = &entry;
+    spec.dataSize = sizeof(uint32_t);
+    spec.pData = &numBuffers;
+
+    VulkanComputePipeline vkComputePipeline(
+        vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
 
     VulkanDescriptorPool vkDescriptorPool(vkDevice,
                                           vkDescriptorSetLayoutBindingList);
@@ -461,12 +473,24 @@ int run_test_with_one_queue(
     vkDescriptorSetLayoutBindingList.addBinding(
         0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
     vkDescriptorSetLayoutBindingList.addBinding(
-        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
+        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
     VulkanDescriptorSetLayout vkDescriptorSetLayout(
         vkDevice, vkDescriptorSetLayoutBindingList);
     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
-    VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
-                                            vkBufferShaderModule);
+
+    VkSpecializationMapEntry entry;
+    entry.constantID = 0;
+    entry.offset = 0;
+    entry.size = sizeof(uint32_t);
+
+    VkSpecializationInfo spec;
+    spec.mapEntryCount = 1;
+    spec.pMapEntries = &entry;
+    spec.dataSize = sizeof(uint32_t);
+    spec.pData = &numBuffers;
+
+    VulkanComputePipeline vkComputePipeline(
+        vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
 
     VulkanDescriptorPool vkDescriptorPool(vkDevice,
                                           vkDescriptorSetLayoutBindingList);
@@ -764,12 +788,24 @@ int run_test_with_multi_import_same_ctx(
     vkDescriptorSetLayoutBindingList.addBinding(
         0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
     vkDescriptorSetLayoutBindingList.addBinding(
-        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
+        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
     VulkanDescriptorSetLayout vkDescriptorSetLayout(
         vkDevice, vkDescriptorSetLayoutBindingList);
     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
-    VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
-                                            vkBufferShaderModule);
+
+    VkSpecializationMapEntry entry;
+    entry.constantID = 0;
+    entry.offset = 0;
+    entry.size = sizeof(uint32_t);
+
+    VkSpecializationInfo spec;
+    spec.mapEntryCount = 1;
+    spec.pMapEntries = &entry;
+    spec.dataSize = sizeof(uint32_t);
+    spec.pData = &numBuffers;
+
+    VulkanComputePipeline vkComputePipeline(
+        vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
 
     VulkanDescriptorPool vkDescriptorPool(vkDevice,
                                           vkDescriptorSetLayoutBindingList);
@@ -1103,12 +1139,24 @@ int run_test_with_multi_import_diff_ctx(
     vkDescriptorSetLayoutBindingList.addBinding(
         0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
     vkDescriptorSetLayoutBindingList.addBinding(
-        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
+        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
     VulkanDescriptorSetLayout vkDescriptorSetLayout(
         vkDevice, vkDescriptorSetLayoutBindingList);
     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
-    VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
-                                            vkBufferShaderModule);
+
+    VkSpecializationMapEntry entry;
+    entry.constantID = 0;
+    entry.offset = 0;
+    entry.size = sizeof(uint32_t);
+
+    VkSpecializationInfo spec;
+    spec.mapEntryCount = 1;
+    spec.pMapEntries = &entry;
+    spec.dataSize = sizeof(uint32_t);
+    spec.pData = &numBuffers;
+
+    VulkanComputePipeline vkComputePipeline(
+        vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
 
     VulkanDescriptorPool vkDescriptorPool(vkDevice,
                                           vkDescriptorSetLayoutBindingList);

From c3d9c85743f29e71b0333fd1b88870da0ba49e2f Mon Sep 17 00:00:00 2001
From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com>
Date: Tue, 3 Mar 2026 17:55:32 +0000
Subject: [PATCH 49/54] Fix sync_fd imported semaphore undefined behaviour
 (#2616)

The following tests create an OpenCL semaphore using (fd == -1) then
call `clEnqueueSignalSemaphoresKHR` on that semaphore. (fd == -1) refers
to an object that has already signaled and enqueueing a signal command
on it will lead to undefined behavior.

Quoting OpenCL specification:
```
The special value -1 for fd is treated like a valid sync file descriptor
referring to an object that has already signaled.
```
And
```
Signaling the same binary semaphore twice without an interleaving wait
may lead to undefined behavior.
```

- external_semaphores_simple_1
- external_semaphores_reuse
- external_semaphores_cross_queues_ooo
- external_semaphores_cross_queues_io
- external_semaphores_cross_queues_io2

This commit changes the tests to avoid signaling an already signaled
semaphore and correctly re-import the semaphore's fd after enqueueing a
wait successfully.

Signed-off-by: Ahmed Hesham <ahmed.hesham@arm.com>
Co-authored-by: Michael Rizkalla <michael.rizkalla@arm.com>
---
 .../test_external_semaphore.cpp               | 150 +++++++++++++-----
 1 file changed, 112 insertions(+), 38 deletions(-)

diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
index b125864f..1e9fd74a 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
@@ -425,9 +425,14 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
 
         // Signal semaphore
         clEventWrapper signal_event;
-        err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
-                                           nullptr, 0, nullptr, &signal_event);
-        test_error(err, "Could not signal semaphore");
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            err = clEnqueueSignalSemaphoresKHR(
+                queue, 1, &sema_ext.getCLSemaphore(), nullptr, 0, nullptr,
+                &signal_event);
+            test_error(err, "Could not signal semaphore");
+        }
 
         // Wait semaphore
         clEventWrapper wait_event;
@@ -440,7 +445,11 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
         test_error(err, "Could not finish queue");
 
         // Ensure all events are completed
-        test_assert_event_complete(signal_event);
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            test_assert_event_complete(signal_event);
+        }
         test_assert_event_complete(wait_event);
     }
 
@@ -464,6 +473,7 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
     // Obtain pointers to semaphore's API
     GET_PFN(device, clEnqueueSignalSemaphoresKHR);
     GET_PFN(device, clEnqueueWaitSemaphoresKHR);
+    GET_PFN(device, clReImportSemaphoreSyncFdKHR);
 
     std::vector<VulkanExternalSemaphoreHandleType>
         vkExternalSemaphoreHandleTypeList =
@@ -507,11 +517,15 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
         err = clEnqueueTask(queue, kernel, 0, nullptr, &task_events[0]);
         test_error(err, "Unable to enqueue task_1");
 
-        // Signal semaphore (dependency on task_1)
-        err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
-                                           nullptr, 1, &task_events[0],
-                                           &signal_events[0]);
-        test_error(err, "Could not signal semaphore");
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            // Signal semaphore (dependency on task_1)
+            err = clEnqueueSignalSemaphoresKHR(
+                queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1,
+                &task_events[0], &signal_events[0]);
+            test_error(err, "Could not signal semaphore");
+        }
 
         // In a loop
         size_t loop;
@@ -532,11 +546,21 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
             err = clWaitForEvents(1, &wait_events[loop - 1]);
             test_error(err, "Unable to wait for wait semaphore to complete");
 
-            // Signal semaphore (dependency on task_loop)
-            err = clEnqueueSignalSemaphoresKHR(
-                queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1,
-                &task_events[loop], &signal_events[loop]);
-            test_error(err, "Could not signal semaphore");
+            if (vkExternalSemaphoreHandleType
+                == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+            {
+                err = clReImportSemaphoreSyncFdKHR(sema_ext.getCLSemaphore(),
+                                                   nullptr, -1);
+                test_error(err, "Could not reimport semaphore sync fd");
+            }
+            else
+            {
+                // Signal semaphore (dependency on task_loop)
+                err = clEnqueueSignalSemaphoresKHR(
+                    queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1,
+                    &task_events[loop], &signal_events[loop]);
+                test_error(err, "Could not signal semaphore");
+            }
         }
 
         // Wait semaphore
@@ -553,7 +577,11 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
         for (loop = 0; loop < loop_count; ++loop)
         {
             test_assert_event_complete(wait_events[loop]);
-            test_assert_event_complete(signal_events[loop]);
+            if (vkExternalSemaphoreHandleType
+                != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+            {
+                test_assert_event_complete(signal_events[loop]);
+            }
             test_assert_event_complete(task_events[loop]);
         }
     }
@@ -595,6 +623,19 @@ static int external_semaphore_cross_queue_helper(cl_device_id device,
     for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType :
          vkExternalSemaphoreHandleTypeList)
     {
+        if (vkExternalSemaphoreHandleType
+            == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            std::stringstream log_message;
+            log_message
+                << "Skipping semaphore type: \""
+                << vkExternalSemaphoreHandleType
+                << "\"; it cannot be signaled from OpenCL when imported."
+                << std::endl;
+            log_info("%s", log_message.str().c_str());
+            continue;
+        }
+
         log_info_semaphore_type(vkExternalSemaphoreHandleType);
         VulkanSemaphore vkVk2CLSemaphore(vkDevice,
                                          vkExternalSemaphoreHandleType);
@@ -727,10 +768,14 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2))
 
         // Signal semaphore 1
         clEventWrapper signal_1_event;
-        err = clEnqueueSignalSemaphoresKHR(
-            queue1, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr,
-            &signal_1_event);
-        test_error(err, "Could not signal semaphore");
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            err = clEnqueueSignalSemaphoresKHR(
+                queue1, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr,
+                &signal_1_event);
+            test_error(err, "Could not signal semaphore");
+        }
 
         // Wait semaphore 1
         clEventWrapper wait_1_event;
@@ -741,10 +786,14 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2))
 
         // Signal semaphore 2
         clEventWrapper signal_2_event;
-        err = clEnqueueSignalSemaphoresKHR(
-            queue2, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr,
-            &signal_2_event);
-        test_error(err, "Could not signal semaphore");
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            err = clEnqueueSignalSemaphoresKHR(
+                queue2, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr,
+                &signal_2_event);
+            test_error(err, "Could not signal semaphore");
+        }
 
         // Wait semaphore 2
         clEventWrapper wait_2_event;
@@ -761,8 +810,12 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2))
         test_error(err, "Could not finish queue");
 
         // Ensure all events are completed
-        test_assert_event_complete(signal_1_event);
-        test_assert_event_complete(signal_2_event);
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            test_assert_event_complete(signal_1_event);
+            test_assert_event_complete(signal_2_event);
+        }
         test_assert_event_complete(wait_1_event);
         test_assert_event_complete(wait_2_event);
     }
@@ -800,6 +853,19 @@ REGISTER_TEST_VERSION(external_semaphores_multi_signal, Version(1, 2))
     for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType :
          vkExternalSemaphoreHandleTypeList)
     {
+        if (vkExternalSemaphoreHandleType
+            == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            std::stringstream log_message;
+            log_message
+                << "Skipping semaphore type: \""
+                << vkExternalSemaphoreHandleType
+                << "\"; it cannot be signaled from OpenCL when imported."
+                << std::endl;
+            log_info("%s", log_message.str().c_str());
+            continue;
+        }
+
         log_info_semaphore_type(vkExternalSemaphoreHandleType);
         VulkanSemaphore vkVk2CLSemaphore1(vkDevice,
                                           vkExternalSemaphoreHandleType);
@@ -901,19 +967,23 @@ REGISTER_TEST_VERSION(external_semaphores_multi_wait, Version(1, 2))
             context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
         test_error(err, "Could not create command queue");
 
-        // Signal semaphore 1
         clEventWrapper signal_1_event;
-        err =
-            clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(),
-                                         nullptr, 0, nullptr, &signal_1_event);
-        test_error(err, "Could not signal semaphore");
-
-        // Signal semaphore 2
         clEventWrapper signal_2_event;
-        err =
-            clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_2.getCLSemaphore(),
-                                         nullptr, 0, nullptr, &signal_2_event);
-        test_error(err, "Could not signal semaphore");
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            // Signal semaphore 1
+            err = clEnqueueSignalSemaphoresKHR(
+                queue, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr,
+                &signal_1_event);
+            test_error(err, "Could not signal semaphore");
+
+            // Signal semaphore 2
+            err = clEnqueueSignalSemaphoresKHR(
+                queue, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr,
+                &signal_2_event);
+            test_error(err, "Could not signal semaphore");
+        }
 
         // Wait semaphore 1 and 2
         clEventWrapper wait_event;
@@ -928,8 +998,12 @@ REGISTER_TEST_VERSION(external_semaphores_multi_wait, Version(1, 2))
         test_error(err, "Could not finish queue");
 
         // Ensure all events are completed
-        test_assert_event_complete(signal_1_event);
-        test_assert_event_complete(signal_2_event);
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            test_assert_event_complete(signal_1_event);
+            test_assert_event_complete(signal_2_event);
+        }
         test_assert_event_complete(wait_event);
     }
 

From a56e8ee92b0cff22a637b5f31d9403ccc66076e0 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 10 Mar 2026 16:40:03 +0100
Subject: [PATCH 50/54] Added corrections due to clean up harness helper
 functions for programs and kernels (#2626)

Related to #2597
---
 test_common/harness/kernelHelpers.cpp         | 54 ++++---------------
 test_conformance/api/test_kernel_arg_info.cpp | 26 +++++----
 test_conformance/api/test_queue_hint.cpp      |  3 +-
 .../basic/test_enqueued_local_size.cpp        |  4 +-
 test_conformance/basic/test_sizeof.cpp        |  4 +-
 test_conformance/c11_atomics/common.h         |  5 +-
 .../cxx_for_opencl_ext.cpp                    |  4 +-
 .../gl/test_images_write_common.cpp           |  5 +-
 .../TestNonUniformWorkGroup.cpp               | 10 ++--
 test_conformance/pipes/test_pipe_info.cpp     |  6 +--
 10 files changed, 44 insertions(+), 77 deletions(-)

diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp
index 8fa9c0a4..637bf640 100644
--- a/test_common/harness/kernelHelpers.cpp
+++ b/test_common/harness/kernelHelpers.cpp
@@ -141,7 +141,6 @@ std::string get_kernel_name(const std::string &source)
         {
             kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
             kernelsList[kernelsList.size() - 1] = '.';
-            kernelsList[kernelsList.size() - 1] = '.';
         }
         oss << kernelsList;
     }
@@ -678,17 +677,18 @@ static int create_single_kernel_helper_create_program_offline(
     return CL_SUCCESS;
 }
 
-static int create_single_kernel_helper_create_program(
-    cl_context context, cl_device_id device, cl_program *outProgram,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *buildOptions, CompilationMode compilationMode)
+int create_single_kernel_helper_create_program(cl_context context,
+                                               cl_program *outProgram,
+                                               unsigned int numKernelLines,
+                                               const char **kernelProgram,
+                                               const char *buildOptions)
 {
     std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
 
     std::string filePrefix =
         get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
     bool shouldSaveToDisk = should_save_kernel_source_to_disk(
-        compilationMode, gCompilationCacheMode, gCompilationCachePath,
+        gCompilationMode, gCompilationCacheMode, gCompilationCachePath,
         filePrefix);
 
     if (shouldSaveToDisk)
@@ -701,7 +701,7 @@ static int create_single_kernel_helper_create_program(
             return -1;
         }
     }
-    if (compilationMode == kOnline)
+    if (gCompilationMode == kOnline)
     {
         int error = CL_SUCCESS;
 
@@ -718,42 +718,11 @@ static int create_single_kernel_helper_create_program(
     else
     {
         return create_single_kernel_helper_create_program_offline(
-            context, device, outProgram, numKernelLines, kernelProgram,
-            buildOptions, compilationMode);
+            context, nullptr, outProgram, numKernelLines, kernelProgram,
+            buildOptions, gCompilationMode);
     }
 }
 
-int create_single_kernel_helper_create_program(cl_context context,
-                                               cl_program *outProgram,
-                                               unsigned int numKernelLines,
-                                               const char **kernelProgram,
-                                               const char *buildOptions)
-{
-    return create_single_kernel_helper_create_program(
-        context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
-        gCompilationMode);
-}
-
-int create_single_kernel_helper_create_program_for_device(
-    cl_context context, cl_device_id device, cl_program *outProgram,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *buildOptions)
-{
-    return create_single_kernel_helper_create_program(
-        context, device, outProgram, numKernelLines, kernelProgram,
-        buildOptions, gCompilationMode);
-}
-
-int create_single_kernel_helper_with_build_options(
-    cl_context context, cl_program *outProgram, cl_kernel *outKernel,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *kernelName, const char *buildOptions)
-{
-    return create_single_kernel_helper(context, outProgram, outKernel,
-                                       numKernelLines, kernelProgram,
-                                       kernelName, buildOptions);
-}
-
 // Creates and builds OpenCL C/C++ program, and creates a kernel
 int create_single_kernel_helper(cl_context context, cl_program *outProgram,
                                 cl_kernel *outKernel,
@@ -1239,8 +1208,8 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags,
 {
     cl_image_format *list;
     cl_uint count = 0;
-    cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
-                                            NULL, &count);
+    cl_int err =
+        clGetSupportedImageFormats(context, flags, image_type, 0, NULL, &count);
     if (count == 0) return 0;
 
     list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
@@ -1276,7 +1245,6 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags,
     return (i < count) ? 1 : 0;
 }
 
-size_t get_pixel_bytes(const cl_image_format *fmt);
 size_t get_pixel_bytes(const cl_image_format *fmt)
 {
     size_t chanCount;
diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp
index 90b302b2..47b151b0 100644
--- a/test_conformance/api/test_kernel_arg_info.cpp
+++ b/test_conformance/api/test_kernel_arg_info.cpp
@@ -487,10 +487,10 @@ compare_kernel_with_expected(cl_context context, cl_device_id device,
     int failed_tests = 0;
     clKernelWrapper kernel;
     clProgramWrapper program;
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
-        get_build_options(device).c_str());
-    test_error(err, "create_single_kernel_helper_with_build_options");
+    cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                             &kernel_src, "get_kernel_arg_info",
+                                             get_build_options(device).c_str());
+    test_error(err, "create_single_kernel_helper");
     for (size_t i = 0; i < expected_args.size(); ++i)
     {
         KernelArgInfo actual;
@@ -874,11 +874,10 @@ static int test_null_param(cl_context context, cl_device_id device,
 {
     clProgramWrapper program;
     clKernelWrapper kernel;
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
-        get_build_options(device).c_str());
-    test_error_ret(err, "create_single_kernel_helper_with_build_options",
-                   TEST_FAIL);
+    cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                             &kernel_src, "get_kernel_arg_info",
+                                             get_build_options(device).c_str());
+    test_error_ret(err, "create_single_kernel_helper", TEST_FAIL);
 
     err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
                              CL_KERNEL_ARG_ADDRESS_QUALIFIER, 0, nullptr,
@@ -916,12 +915,11 @@ static int test_arg_name_size(cl_context context, cl_device_id device,
     char arg_return[sizeof(KERNEL_ARGUMENT_NAME) + 1];
     clProgramWrapper program;
     clKernelWrapper kernel;
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
-        get_build_options(device).c_str());
+    cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                             &kernel_src, "get_kernel_arg_info",
+                                             get_build_options(device).c_str());
 
-    test_error_ret(err, "create_single_kernel_helper_with_build_options",
-                   TEST_FAIL);
+    test_error_ret(err, "create_single_kernel_helper", TEST_FAIL);
 
     err =
         clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, CL_KERNEL_ARG_NAME,
diff --git a/test_conformance/api/test_queue_hint.cpp b/test_conformance/api/test_queue_hint.cpp
index 89769d7e..21df4341 100644
--- a/test_conformance/api/test_queue_hint.cpp
+++ b/test_conformance/api/test_queue_hint.cpp
@@ -86,7 +86,8 @@ REGISTER_TEST(queue_hint)
     clProgramWrapper program;
     clKernelWrapper kernel;
 
-    err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, queue_hint_test_kernel, "vec_cpy", NULL);
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      queue_hint_test_kernel, "vec_cpy");
     if (err != 0)
     {
         return err;
diff --git a/test_conformance/basic/test_enqueued_local_size.cpp b/test_conformance/basic/test_enqueued_local_size.cpp
index 47123120..685982d1 100644
--- a/test_conformance/basic/test_enqueued_local_size.cpp
+++ b/test_conformance/basic/test_enqueued_local_size.cpp
@@ -103,11 +103,11 @@ REGISTER_TEST_VERSION(enqueued_local_size, Version(2, 0))
 
     std::string cl_std = "-cl-std=CL";
     cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0";
-    err = create_single_kernel_helper_with_build_options(
+    err = create_single_kernel_helper(
         context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code,
         "test_enqueued_local_size_1d", cl_std.c_str());
     test_error(err, "create_single_kernel_helper failed");
-    err = create_single_kernel_helper_with_build_options(
+    err = create_single_kernel_helper(
         context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code,
         "test_enqueued_local_size_2d", cl_std.c_str());
     test_error(err, "create_single_kernel_helper failed");
diff --git a/test_conformance/basic/test_sizeof.cpp b/test_conformance/basic/test_sizeof.cpp
index ac0a84f9..ecd2d9bf 100644
--- a/test_conformance/basic/test_sizeof.cpp
+++ b/test_conformance/basic/test_sizeof.cpp
@@ -50,8 +50,8 @@ cl_int get_type_size( cl_context context, cl_command_queue queue, const char *ty
     {
         sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
     }
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", nullptr);
+    cl_int err = create_single_kernel_helper(context, &p, &k, 4,
+                                             sizeof_kernel_code, "test_sizeof");
     test_error(err, "Failed to build kernel/program.");
 
     m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err );
diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index 6a8a0082..5f33441b 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -1245,9 +1245,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
             programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems)
                 + FunctionCode() + KernelCode(numDestItems);
             programLine = programSource.c_str();
-            if (create_single_kernel_helper_with_build_options(
-                    context, &program, &kernel, 1, &programLine,
-                    "test_atomic_kernel", gOldAPI ? "" : nullptr))
+            if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                            &programLine, "test_atomic_kernel"))
             {
                 return -1;
             }
diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp
index aee287fd..56be5c65 100644
--- a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp
+++ b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp
@@ -42,8 +42,8 @@ int test_cxx_for_opencl(cl_device_id device, cl_context context,
             execute(*p, x);
         })";
 
-    error = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel1, 1, &kernel_sstr, "k1", "-cl-std=CLC++");
+    error = create_single_kernel_helper(context, &program, &kernel1, 1,
+                                        &kernel_sstr, "k1", "-cl-std=CLC++");
     test_error(error, "Failed to create k1 kernel");
 
     kernel2 = clCreateKernel(program, "k2", &error);
diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp
index 6f1f51e7..77c6c2eb 100644
--- a/test_conformance/gl/test_images_write_common.cpp
+++ b/test_conformance/gl/test_images_write_common.cpp
@@ -336,9 +336,8 @@ int test_cl_image_write(cl_context context, cl_command_queue queue,
             get_explicit_type_name(*outType), suffix, convert);
 
     programPtr = kernelSource;
-    if (create_single_kernel_helper_with_build_options(
-            context, &program, &kernel, 1, (const char **)&programPtr,
-            "sample_test", ""))
+    if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                    (const char **)&programPtr, "sample_test"))
     {
         return -1;
     }
diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
index 6106e0ab..78118697 100644
--- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
@@ -557,8 +557,9 @@ int TestNonUniformWorkGroup::prepareDevice () {
   if (_testRange & Range::BARRIERS)
     buildOptions += " -D TESTBARRIERS";
 
-  err = create_single_kernel_helper_with_build_options (_context, &_program, &_testKernel, 1,
-    &KERNEL_FUNCTION, "testKernel", buildOptions.c_str());
+  err = create_single_kernel_helper(_context, &_program, &_testKernel, 1,
+                                    &KERNEL_FUNCTION, "testKernel",
+                                    buildOptions.c_str());
   if (err)
   {
     log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__);
@@ -842,8 +843,9 @@ int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) {
   if (testRange & Range::BARRIERS)
     buildOptions += " -D TESTBARRIERS";
 
-  err = create_single_kernel_helper_with_build_options (_context, &program, &testKernel, 1,
-    &KERNEL_FUNCTION, "testKernel", buildOptions.c_str());
+  err = create_single_kernel_helper(_context, &program, &testKernel, 1,
+                                    &KERNEL_FUNCTION, "testKernel",
+                                    buildOptions.c_str());
   if (err)
   {
     log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__);
diff --git a/test_conformance/pipes/test_pipe_info.cpp b/test_conformance/pipes/test_pipe_info.cpp
index 5525a554..4e1c83d9 100644
--- a/test_conformance/pipes/test_pipe_info.cpp
+++ b/test_conformance/pipes/test_pipe_info.cpp
@@ -63,9 +63,9 @@ REGISTER_TEST(pipe_info)
         log_info( " CL_PIPE_MAX_PACKETS passed.\n" );
     }
 
-    err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &pipe_kernel_code, "pipe_kernel",
-        "-cl-std=CL2.0 -cl-kernel-arg-info");
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &pipe_kernel_code, "pipe_kernel",
+                                      "-cl-std=CL2.0 -cl-kernel-arg-info");
     test_error_fail(err, "Error creating program");
 
     cl_kernel_arg_type_qualifier arg_type_qualifier = 0;

From 65064216143344acda679c66a6992ddac9aca1ed Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 10 Mar 2026 16:41:40 +0100
Subject: [PATCH 51/54] Added support for cl_ext_float_atomics in
 CBasicTestFetchMinSpecialFloats with atomic_float (#2391)

Related to #2142, according to the work plan, extending
CBasicTestFetchMinSpecialFloats with support for atomic_float.
---
 test_conformance/c11_atomics/common.h         |  37 ++-
 test_conformance/c11_atomics/host_atomics.h   |   1 +
 test_conformance/c11_atomics/test_atomics.cpp | 300 ++++++++++++++++--
 3 files changed, 310 insertions(+), 28 deletions(-)

diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index 5f33441b..5d12b2ab 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -22,6 +22,7 @@
 
 #include "host_atomics.h"
 
+#include <algorithm>
 #include <iomanip>
 #include <limits>
 #include <sstream>
@@ -96,6 +97,37 @@ extern cl_int getSupportedMemoryOrdersAndScopes(
     cl_device_id device, std::vector<TExplicitMemoryOrderType> &memoryOrders,
     std::vector<TExplicitMemoryScopeType> &memoryScopes);
 
+union FloatIntUnion {
+    float f;
+    uint32_t i;
+};
+
+template <typename HostDataType> bool is_qnan(const HostDataType &value)
+{
+    if constexpr (std::is_same_v<HostDataType, float>)
+    {
+        FloatIntUnion u;
+        u.f = value;
+        if ((u.i & 0x7F800000) != 0x7F800000) return false;
+        return (u.i & 0x00400000) != 0;
+    }
+    else
+        return std::isnan(value);
+}
+
+template <typename HostDataType> bool is_snan(const HostDataType &value)
+{
+    if constexpr (std::is_same_v<HostDataType, float>)
+    {
+        FloatIntUnion u;
+        u.f = value;
+        if ((u.i & 0x7F800000) != 0x7F800000) return false;
+        return (u.i & 0x00400000) == 0;
+    }
+    else
+        return std::isnan(value);
+}
+
 class AtomicTypeInfo {
 public:
     TExplicitAtomicType _type;
@@ -187,6 +219,7 @@ public:
     virtual bool
     IsTestNotAsExpected(const HostDataType &expected,
                         const std::vector<HostAtomicType> &testValues,
+                        const std::vector<HostDataType> &startRefValues,
                         cl_uint whichDestValue)
     {
         return expected
@@ -928,7 +961,7 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
         if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_ATOMIC_DOUBLE> || std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             if (std::isinf(_startValue))
                 ss << (_startValue < 0 ? "-" : "") << "INFINITY";
@@ -1505,7 +1538,7 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
                            startRefValues.size() ? &startRefValues[0] : 0, i))
             break; // no expected value function provided
 
-        if (IsTestNotAsExpected(expected, destItems, i))
+        if (IsTestNotAsExpected(expected, destItems, startRefValues, i))
         {
             std::stringstream logLine;
             logLine << "ERROR: Result " << i
diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h
index 8d875bc9..4a65d8da 100644
--- a/test_conformance/c11_atomics/host_atomics.h
+++ b/test_conformance/c11_atomics/host_atomics.h
@@ -18,6 +18,7 @@
 
 #include "harness/testHarness.h"
 #include <mutex>
+
 #include "CL/cl_half.h"
 
 #ifdef WIN32
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index 06f2fd9e..d3dd3703 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -1329,6 +1329,7 @@ public:
     }
     bool IsTestNotAsExpected(const HostDataType &expected,
                              const std::vector<HostAtomicType> &testValues,
+                             const std::vector<HostDataType> &startRefValues,
                              cl_uint whichDestValue) override
     {
         if constexpr (is_host_fp_v<HostDataType>)
@@ -1343,6 +1344,7 @@ public:
         return CBasicTestMemOrderScope<
             HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
                                                                testValues,
+                                                               startRefValues,
                                                                whichDestValue);
     }
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
@@ -1653,6 +1655,7 @@ public:
     }
     bool IsTestNotAsExpected(const HostDataType &expected,
                              const std::vector<HostAtomicType> &testValues,
+                             const std::vector<HostDataType> &startRefValues,
                              cl_uint whichDestValue) override
     {
         if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
@@ -1670,6 +1673,7 @@ public:
         return CBasicTestMemOrderScope<
             HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
                                                                testValues,
+                                                               startRefValues,
                                                                whichDestValue);
     }
     int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
@@ -1776,38 +1780,23 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
 
     if (gFloatAtomicsSupported)
     {
-        auto spec_vals_fp64 =
-            CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_DOUBLE,
-                                            HOST_DOUBLE>::GetSpecialValues();
-
         CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_DOUBLE, HOST_DOUBLE>
             test_spec_double(TYPE_ATOMIC_DOUBLE, useSVM);
-        EXECUTE_TEST(error,
-                     test_spec_double.Execute(deviceID, context, queue,
-                                              spec_vals_fp64.size()
-                                                  * spec_vals_fp64.size()));
-
-        auto spec_vals_fp32 =
-            CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_FLOAT,
-                                            HOST_FLOAT>::GetSpecialValues();
+        EXECUTE_TEST(
+            error,
+            test_spec_double.Execute(deviceID, context, queue, num_elements));
 
         CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_FLOAT, HOST_FLOAT>
             test_spec_float(TYPE_ATOMIC_FLOAT, useSVM);
-        EXECUTE_TEST(error,
-                     test_spec_float.Execute(deviceID, context, queue,
-                                             spec_vals_fp32.size()
-                                                 * spec_vals_fp32.size()));
-
-        auto spec_vals_halfs =
-            CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF,
-                                            HOST_HALF>::GetSpecialValues();
+        EXECUTE_TEST(
+            error,
+            test_spec_float.Execute(deviceID, context, queue, num_elements));
 
         CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF, HOST_HALF>
             test_spec_half(TYPE_ATOMIC_HALF, useSVM);
-        EXECUTE_TEST(error,
-                     test_spec_half.Execute(deviceID, context, queue,
-                                            spec_vals_halfs.size()
-                                                * spec_vals_halfs.size()));
+        EXECUTE_TEST(
+            error,
+            test_spec_half.Execute(deviceID, context, queue, num_elements));
 
         CBasicTestFetchAdd<HOST_ATOMIC_HALF, HOST_HALF> test_half(
             TYPE_ATOMIC_HALF, useSVM);
@@ -2070,6 +2059,7 @@ public:
     }
     bool IsTestNotAsExpected(const HostDataType &expected,
                              const std::vector<HostAtomicType> &testValues,
+                             const std::vector<HostDataType> &startRefValues,
                              cl_uint whichDestValue) override
     {
         if constexpr (is_host_fp_v<HostDataType>)
@@ -2084,6 +2074,7 @@ public:
         return CBasicTestMemOrderScope<
             HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
                                                                testValues,
+                                                               startRefValues,
                                                                whichDestValue);
     }
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
@@ -3172,18 +3163,21 @@ public:
     }
     bool IsTestNotAsExpected(const HostDataType &expected,
                              const std::vector<HostAtomicType> &testValues,
+                             const std::vector<HostDataType> &startRefValues,
                              cl_uint whichDestValue) override
     {
         if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
-                    IsTestNotAsExpected(expected, testValues, whichDestValue);
+                    IsTestNotAsExpected(expected, testValues, startRefValues,
+                                        whichDestValue);
             return false; // ignore all but 0 which stores final result
         }
         return CBasicTestMemOrderScope<
             HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
                                                                testValues,
+                                                               startRefValues,
                                                                whichDestValue);
     }
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
@@ -3265,6 +3259,251 @@ public:
     }
 };
 
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchMinSpecialFloats
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
+
+    std::vector<HostDataType> ref_vals;
+
+public:
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+    CBasicTestFetchMinSpecialFloats(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        // StartValue is used as an index divisor in the following test
+        // logic. It is set to the number of special values, which allows
+        // threads to be mapped deterministically onto the input data array.
+        // This enables repeated add operations arranged so that every
+        // special value is added to every other one (“all-to-all”).
+
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            auto spec_vals = GetSpecialValues();
+            StartValue(spec_vals.size());
+            CBasicTestMemOrderScope<HostAtomicType,
+                                    HostDataType>::OldValueCheck(false);
+        }
+    }
+
+    static std::vector<HostDataType> &GetSpecialValues()
+    {
+        static std::vector<HostDataType> special_values;
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            const HostDataType test_value_zero =
+                static_cast<HostDataType>(0.0f);
+            const HostDataType test_value_minus_zero =
+                static_cast<HostDataType>(-0.0f);
+            const HostDataType test_value_without_fraction =
+                static_cast<HostDataType>(2.0f);
+            const HostDataType test_value_with_fraction =
+                static_cast<HostDataType>(2.2f);
+
+            if (special_values.empty())
+            {
+                special_values = {
+                    static_cast<HostDataType>(test_value_minus_zero),
+                    static_cast<HostDataType>(test_value_zero),
+                    static_cast<HostDataType>(test_value_without_fraction),
+                    static_cast<HostDataType>(test_value_with_fraction),
+                    std::numeric_limits<HostDataType>::infinity(),
+                    std::numeric_limits<HostDataType>::quiet_NaN(),
+                    std::numeric_limits<HostDataType>::signaling_NaN(),
+                    -std::numeric_limits<HostDataType>::infinity(),
+                    -std::numeric_limits<HostDataType>::quiet_NaN(),
+                    -std::numeric_limits<HostDataType>::signaling_NaN(),
+                    std::numeric_limits<HostDataType>::lowest(),
+                    std::numeric_limits<HostDataType>::min(),
+                    std::numeric_limits<HostDataType>::max(),
+                };
+
+                if (0 != (CL_FP_DENORM & gFloatFPConfig))
+                {
+                    special_values.push_back(
+                        std::numeric_limits<HostDataType>::denorm_min());
+                }
+            }
+        }
+
+        return special_values;
+    }
+
+    bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+                      MTdata d) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            if (threadCount > ref_vals.size())
+            {
+                ref_vals.assign(threadCount, 0);
+                auto spec_vals = GetSpecialValues();
+
+                cl_uint total_cnt = 0;
+                while (total_cnt < threadCount)
+                {
+                    cl_uint block_cnt =
+                        std::min((cl_int)(threadCount - total_cnt),
+                                 (cl_int)spec_vals.size());
+                    memcpy(&ref_vals.at(total_cnt), spec_vals.data(),
+                           sizeof(HostDataType) * block_cnt);
+                    total_cnt += block_cnt;
+                }
+            }
+
+            memcpy(startRefValues, ref_vals.data(),
+                   sizeof(HostDataType) * threadCount);
+
+            return true;
+        }
+        return false;
+    }
+    std::string ProgramCore() override
+    {
+        // The start_value variable (set by StartValue) is used
+        // as a divisor of the thread index when selecting the operand for
+        // atomic_fetch_add. This groups threads into blocks corresponding
+        // to the number of special values and implements an “all-to-all”
+        // addition pattern. As a result, each destination element is
+        // updated using different combinations of input values, enabling
+        // consistent comparison between host and device execution.
+
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return std::string(DataType().AddSubOperandTypeName())
+            + "  start_value = atomic_load_explicit(destMemory+tid, "
+              "memory_order_relaxed, memory_scope_work_group);\n"
+              "  atomic_store_explicit(destMemory+tid, oldValues[tid], "
+              "memory_order_relaxed, memory_scope_work_group);\n"
+              "  atomic_fetch_min"
+            + postfix + "(&destMemory[tid], ("
+            + DataType().AddSubOperandTypeName()
+            + ")oldValues[tid/(int)start_value]" + memoryOrderScope + ");\n";
+    }
+    void HostFunction(cl_uint tid, cl_uint threadCount,
+                      volatile HostAtomicType *destMemory,
+                      HostDataType *oldValues) override
+    {
+        auto spec_vals = GetSpecialValues();
+        host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
+                          MEMORY_ORDER_SEQ_CST);
+        host_atomic_fetch_min(&destMemory[tid],
+                              (HostDataType)oldValues[tid / spec_vals.size()],
+                              MemoryOrder());
+    }
+    bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                       HostDataType *startRefValues,
+                       cl_uint whichDestValue) override
+    {
+        expected = StartValue();
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            auto spec_vals = GetSpecialValues();
+            expected =
+                std::min(startRefValues[whichDestValue],
+                         startRefValues[whichDestValue / spec_vals.size()]);
+        }
+        return true;
+    }
+    bool IsTestNotAsExpected(const HostDataType &expected,
+                             const std::vector<HostAtomicType> &testValues,
+                             const std::vector<HostDataType> &startRefValues,
+                             cl_uint whichDestValue) override
+    {
+        if (testValues[whichDestValue] != expected)
+        {
+            auto spec_vals = GetSpecialValues();
+            // special cases
+            // min(-0, +0) = min(+0, -0) = +0 or -0,
+            if (((startRefValues[whichDestValue] == -0.f)
+                 && (startRefValues[whichDestValue / spec_vals.size()] == 0.f))
+                || ((startRefValues[whichDestValue] == 0.f)
+                    && (startRefValues[whichDestValue / spec_vals.size()]
+                        == -0.f)))
+                return false;
+            else if (is_qnan(startRefValues[whichDestValue / spec_vals.size()])
+                     || is_qnan(startRefValues[whichDestValue]))
+            {
+                // min(x, qNaN) = min(qNaN, x) = x,
+                // min(qNaN, qNaN) = qNaN,
+                if (is_qnan(startRefValues[whichDestValue / spec_vals.size()])
+                    && is_qnan(startRefValues[whichDestValue]))
+                    return !is_qnan(testValues[whichDestValue]);
+                else if (is_qnan(
+                             startRefValues[whichDestValue / spec_vals.size()]))
+                    return !std::isnan(testValues[whichDestValue])
+                        && testValues[whichDestValue]
+                        != startRefValues[whichDestValue]; // NaN != NaN always
+                                                           // true
+                else
+                    return !std::isnan(testValues[whichDestValue])
+                        && testValues[whichDestValue]
+                        != startRefValues[whichDestValue / spec_vals.size()];
+            }
+            else if (is_snan(startRefValues[whichDestValue / spec_vals.size()])
+                     || is_snan(startRefValues[whichDestValue]))
+            {
+                // min(x, sNaN) = min(sNaN, x) = NaN or x, and
+                // min(NaN, sNaN) = min(sNaN, NaN) = NaN
+                if (std::isnan(testValues[whichDestValue])
+                    || testValues[whichDestValue]
+                        == startRefValues[whichDestValue]
+                    || testValues[whichDestValue]
+                        == startRefValues[whichDestValue / spec_vals.size()])
+                    return false;
+            }
+        }
+
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
+                                                               testValues,
+                                                               startRefValues,
+                                                               whichDestValue);
+    }
+    int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            if (LocalMemory()
+                && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
+                    == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
+                    == 0)
+                return 0;
+
+            if (!CBasicTestMemOrderScope<HostAtomicType,
+                                         HostDataType>::LocalMemory()
+                && CBasicTestMemOrderScope<HostAtomicType,
+                                           HostDataType>::DeclaredInProgram())
+            {
+                if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0;
+            }
+        }
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+                                                             queue);
+    }
+    cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            return threadCount;
+        }
+        return CBasicTestMemOrderScope<HostAtomicType,
+                                       HostDataType>::NumResults(threadCount,
+                                                                 deviceID);
+    }
+};
+
 static int test_atomic_fetch_min_generic(cl_device_id deviceID,
                                          cl_context context,
                                          cl_command_queue queue,
@@ -3290,6 +3529,12 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID,
 
     if (gFloatAtomicsSupported)
     {
+        CBasicTestFetchMinSpecialFloats<HOST_ATOMIC_FLOAT, HOST_FLOAT>
+            test_spec_float(TYPE_ATOMIC_FLOAT, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_spec_float.Execute(deviceID, context, queue, num_elements));
+
         CBasicTestFetchMin<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
             TYPE_ATOMIC_DOUBLE, useSVM);
         EXECUTE_TEST(
@@ -3478,18 +3723,21 @@ public:
     }
     bool IsTestNotAsExpected(const HostDataType &expected,
                              const std::vector<HostAtomicType> &testValues,
+                             const std::vector<HostDataType> &startRefValues,
                              cl_uint whichDestValue) override
     {
         if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
-                    IsTestNotAsExpected(expected, testValues, whichDestValue);
+                    IsTestNotAsExpected(expected, testValues, startRefValues,
+                                        whichDestValue);
             return false; // ignore all but 0 which stores final result
         }
         return CBasicTestMemOrderScope<
             HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
                                                                testValues,
+                                                               startRefValues,
                                                                whichDestValue);
     }
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,

From 4e3f16b2b91defa4b699e2f4e44433af2cc4cf4d Mon Sep 17 00:00:00 2001
From: Michal Babej <90404+franz@users.noreply.github.com>
Date: Tue, 17 Mar 2026 18:25:59 +0200
Subject: [PATCH 52/54] initial RISC-V support (#2614)

Unlike related PR #2344 that simply warns about unsupported FTZ, this PR
attempts to correctly handle FTZ on RISC-V.
RISC-V 'f' extension does not support any way to enable/disable flushing
subnormals to zero, implementations are required to always support
subnormals. Therefore this PR re-uses FTZ handling code from PPC, where
flushing also has to be explicitly performed.
---
 CMakeLists.txt                                 |  2 ++
 test_common/harness/fpcontrol.h                | 11 +++++++----
 test_common/harness/rounding_mode.cpp          |  5 +++++
 test_common/harness/testHarness.cpp            |  2 ++
 test_conformance/contractions/contractions.cpp | 18 ++++++++++--------
 .../conversions/basic_test_conversions.h       |  2 --
 .../math_brute_force/reference_math.cpp        | 10 ++++++----
 7 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 30a64447..3dbd7944 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -89,6 +89,8 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
     set(CLConform_TARGET_ARCH x86_64)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*")
     set(CLConform_TARGET_ARCH x86)
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv.*")
+    set(CLConform_TARGET_ARCH RISCV)
 endif()
 
 if(NOT DEFINED CLConform_TARGET_ARCH)
diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h
index afb0f5a3..9c1e0db0 100644
--- a/test_common/harness/fpcontrol.h
+++ b/test_common/harness/fpcontrol.h
@@ -45,6 +45,9 @@ typedef int64_t FPU_mode_type;
 #elif defined(__PPC__)
 #include <fpu_control.h>
 extern __thread fpu_control_t fpu_control;
+#elif defined(__riscv)
+#define _FPU_MASK_NI 1
+static FPU_mode_type fpu_control;
 #elif defined(__mips__)
 #include "mips/m32c1.h"
 #endif
@@ -56,7 +59,7 @@ inline void ForceFTZ(FPU_mode_type *oldMode)
     || defined(_M_X64) || defined(__MINGW32__)
     *oldMode = _mm_getcsr();
     _mm_setcsr(*oldMode | 0x8040);
-#elif defined(__PPC__)
+#elif defined(__PPC__) || defined(__riscv)
     *oldMode = fpu_control;
     fpu_control |= _FPU_MASK_NI;
 #elif defined(__arm__)
@@ -89,8 +92,8 @@ inline void DisableFTZ(FPU_mode_type *oldMode)
     || defined(_M_X64) || defined(__MINGW32__)
     *oldMode = _mm_getcsr();
     _mm_setcsr(*oldMode & ~0x8040);
-#elif defined(__PPC__)
-    *mode = fpu_control;
+#elif defined(__PPC__) || defined(__riscv)
+    *oldMode = fpu_control;
     fpu_control &= ~_FPU_MASK_NI;
 #elif defined(__arm__)
     unsigned fpscr;
@@ -121,7 +124,7 @@ inline void RestoreFPState(FPU_mode_type *mode)
 #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)               \
     || defined(_M_X64) || defined(__MINGW32__)
     _mm_setcsr(*mode);
-#elif defined(__PPC__)
+#elif defined(__PPC__) || defined(__riscv)
     fpu_control = *mode;
 #elif defined(__arm__)
     __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp
index 5aeb86f1..31b18f75 100644
--- a/test_common/harness/rounding_mode.cpp
+++ b/test_common/harness/rounding_mode.cpp
@@ -201,6 +201,7 @@ RoundingMode get_round(void)
 #elif defined(__mips__)
 #include "mips/m32c1.h"
 #endif
+
 void *FlushToZero(void)
 {
 #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
@@ -231,6 +232,8 @@ void *FlushToZero(void)
 #elif defined(__mips__)
     fpa_bissr(FPA_CSR_FS);
     return NULL;
+#elif defined(__riscv)
+    return NULL;
 #else
 #error Unknown arch
 #endif
@@ -266,6 +269,8 @@ void UnFlushToZero(void *p)
     _FPU_SETCW(flags);
 #elif defined(__mips__)
     fpa_bicsr(FPA_CSR_FS);
+#elif defined(__riscv)
+    return;
 #else
 #error Unknown arch
 #endif
diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index 301b86d0..6f1d1505 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -1409,6 +1409,8 @@ void PrintArch(void)
     vlog("ARCH:\tWindows\n");
 #elif defined(__mips__)
     vlog("ARCH:\tmips\n");
+#elif defined(__riscv)
+    vlog("ARCH:\tRISC-V\n");
 #else
 #error unknown arch
 #endif
diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp
index 0c868764..b3f1098d 100644
--- a/test_conformance/contractions/contractions.cpp
+++ b/test_conformance/contractions/contractions.cpp
@@ -191,7 +191,7 @@ double sse_mul_sd(double x, double y)
 }
 #endif
 
-#ifdef __PPC__
+#if defined(__PPC__) || defined(__riscv)
 float ppc_mul(float a, float b)
 {
     float p;
@@ -630,9 +630,11 @@ test_status InitCL( cl_device_id device )
             // turn that off
             f3[i] = sse_mul(q, q2);
             f4[i] = sse_mul(-q, q2);
-#elif defined(__PPC__)
-            // None of the current generation PPC processors support HW
-            // FTZ, emulate it in sw.
+#elif (defined(__PPC__) || defined(__riscv))
+            // RISC-V CPUs with default 'f' fp32 extension do not support
+            // enabling/disabling FTZ mode, subnormals are always handled
+            // without FTZ. None of the current generation PPC processors
+            // support HW FTZ, emulate it in sw.
             f3[i] = ppc_mul(q, q2);
             f4[i] = ppc_mul(-q, q2);
 #else
@@ -721,9 +723,10 @@ test_status InitCL( cl_device_id device )
                 skipTest[j][i] = (bufSkip[i] ||
                                   (gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)))));
 
-#if defined(__PPC__)
-                // Since the current Power processors don't emulate flush to zero in HW,
-                // it must be emulated in SW instead.
+#if defined(__PPC__) || defined(__riscv)
+                // Since the current Power processors don't emulate flush to
+                // zero in HW, it must be emulated in SW instead. (same for
+                // RISC-V CPUs with 'f' extension)
                 if (gForceFTZ)
                 {
                     if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f))
@@ -760,7 +763,6 @@ test_status InitCL( cl_device_id device )
                 }
             }
 
-
             double *f  = (double*) buf1;
             double *f2 = (double*) buf2;
             double *f3 = (double*) buf3_double;
diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h
index 6846f780..496ea730 100644
--- a/test_conformance/conversions/basic_test_conversions.h
+++ b/test_conformance/conversions/basic_test_conversions.h
@@ -120,8 +120,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p);
 uint64_t GetTime(void);
 
 void WriteInputBufferComplete(void *);
-void *FlushToZero(void);
-void UnFlushToZero(void *);
 }
 
 struct CalcRefValsBase
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index a66e6f7e..183edc74 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -859,7 +859,9 @@ double reference_add(double x, double y)
     __m128 vb = _mm_set_ss((float)b);
     va = _mm_add_ss(va, vb);
     _mm_store_ss((float *)&a, va);
-#elif defined(__PPC__)
+#elif defined(__PPC__) || defined(__riscv)
+    // RISC-V CPUs with default 'f' fp32 extension do not support any way to
+    // enable/disable FTZ mode, subnormals are always handled without flushing.
     // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
     // denorm's to zero. As such, the reference add with FTZ must be emulated in
     // sw.
@@ -876,7 +878,7 @@ double reference_add(double x, double y)
         } ub;
         ub.d = b;
         cl_uint mantA, mantB;
-        cl_ulong addendA, addendB, sum;
+        cl_ulong addendA, addendB;
         int expA = extractf(a, &mantA);
         int expB = extractf(b, &mantB);
         cl_uint signA = ua.u & 0x80000000U;
@@ -972,7 +974,7 @@ double reference_multiply(double x, double y)
     __m128 vb = _mm_set_ss((float)b);
     va = _mm_mul_ss(va, vb);
     _mm_store_ss((float *)&a, va);
-#elif defined(__PPC__)
+#elif defined(__PPC__) || defined(__riscv)
     // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
     // denorm's to zero. As such, the reference multiply with FTZ must be
     // emulated in sw.
@@ -3351,7 +3353,7 @@ long double reference_cbrtl(long double x)
 
 long double reference_rintl(long double x)
 {
-#if defined(__PPC__)
+#if defined(__PPC__) || defined(__riscv)
     // On PPC, long doubles are maintained as 2 doubles. Therefore, the combined
     // mantissa can represent more than LDBL_MANT_DIG binary digits.
     x = rintl(x);

From 0338fd9d39d58b10129774d35f4453a9a03b6049 Mon Sep 17 00:00:00 2001
From: Jose Lopez <joselope@qti.qualcomm.com>
Date: Tue, 17 Mar 2026 16:27:02 +0000
Subject: [PATCH 53/54] Unblock Visual Studio ARM64EC target builds. Disable
 inclusion of SSE headers for non x86 targets. (#2631)

- Use `CMAKE_VS_PLATFORM_NAME` in tandem with `CMAKE_SYSTEM_PROCESSOR`
to
  set `CLConform_TARGET_ARCH`. `CMAKE_VS_PLATFORM_NAME` provides cross
  compilation information that `CMAKE_SYSTEM_PROCESSOR` lacks, like
  targeting `ARM64EC`.

- Curb the inclusion of `SSE` headers when targeting non-x86 targets.
---
 CMakeLists.txt | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3dbd7944..09fa4854 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -81,10 +81,11 @@ include(CheckFunctionExists)
 include(CheckIncludeFiles)
 include(CheckCXXCompilerFlag)
 
-if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
-    set(CLConform_TARGET_ARCH ARM)
-elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)" OR
+   CMAKE_VS_PLATFORM_NAME MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
     set(CLConform_TARGET_ARCH ARM64)
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
+    set(CLConform_TARGET_ARCH ARM)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
     set(CLConform_TARGET_ARCH x86_64)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*")
@@ -141,8 +142,12 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang"
         add_cxx_flag_if_supported(-frounding-math)
     endif()
 else()
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE__")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
+    # Curb the inclusion of SSE headers when compiling for non x86 targets
+    if(${CLConform_TARGET_ARCH} STREQUAL "x86_64" OR ${CLConform_TARGET_ARCH}
+            STREQUAL "x86")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE__")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
+    endif()
 endif()
 
 # Set a module's COMPILE_FLAGS if using gcc or clang.

From 4a923c074b2acbc0f2db18c0360de31acb6fdad6 Mon Sep 17 00:00:00 2001
From: Ole Strohm <ole.strohm@arm.com>
Date: Tue, 17 Mar 2026 16:54:32 +0000
Subject: [PATCH 54/54] Make CL_CONFORMANCE_RESULTS_FILENAME Bazel-aware
 (#2629)

Bazel requires that test outputs are put in a specific directory given
by $TEST_UNDECLARED_OUTPUTS_DIR when running a test through Bazel test

This patch checks for the environment variable $BAZEL_TEST, which Bazel
sets when running tests, and prepends the specified directory to the
user-provided path.

The behaviour when running outside of a bazel test environment is
unchanged
---
 test_common/harness/testHarness.cpp | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index 6f1d1505..92729c49 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "testHarness.h"
+#include "stringHelpers.h"
 #include "compat.h"
 #include <algorithm>
 #include <stdio.h>
@@ -21,6 +22,7 @@
 #include <string.h>
 #include <cassert>
 #include <deque>
+#include <filesystem>
 #include <mutex>
 #include <set>
 #include <stdexcept>
@@ -33,6 +35,8 @@
 #include "imageHelpers.h"
 #include "parseParameters.h"
 
+namespace fs = std::filesystem;
+
 #if !defined(_WIN32)
 #include <sys/utsname.h>
 #include <unistd.h>
@@ -95,11 +99,25 @@ static int saveResultsToJson(const char *suiteName, test_definition testList[],
         return EXIT_SUCCESS;
     }
 
-    FILE *file = fopen(fileName, "w");
+    fs::path file_path(fileName);
+
+    // When running under Bazel test, prepend the Bazel output directory to
+    // the provided path
+    if (nullptr != getenv("BAZEL_TEST"))
+    {
+        char *bazel_output_dir = getenv("TEST_UNDECLARED_OUTPUTS_DIR");
+        if (nullptr != bazel_output_dir)
+        {
+            file_path = fs::path(bazel_output_dir) / file_path;
+        }
+    }
+
+    auto file_path_str = to_string(file_path.u8string());
+    FILE *file = fopen(file_path_str.c_str(), "w");
     if (NULL == file)
     {
         log_error("ERROR: Failed to open '%s' for writing results.\n",
-                  fileName);
+                  file_path_str.c_str());
         return EXIT_FAILURE;
     }
 
@@ -128,7 +146,8 @@ static int saveResultsToJson(const char *suiteName, test_definition testList[],
 
     int ret = fclose(file) ? EXIT_FAILURE : EXIT_SUCCESS;
 
-    log_info("Saving results to %s: %s!\n", fileName, save_map[ret]);
+    log_info("Saving results to %s: %s!\n", file_path_str.c_str(),
+             save_map[ret]);
 
     return ret;
 }
@@ -309,6 +328,8 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
                  "CL_CONFORMANCE_RESULTS_FILENAME (currently '%s')\n",
                  fileName != NULL ? fileName : "<undefined>");
         log_info("\t      to save results to JSON file.\n");
+        log_info("\t      When running in Bazel test this is relative to "
+                 "$TEST_UNDECLARED_OUTPUTS_DIR.\n");
 
         log_info("\n");
         log_info("Test names:\n");