From bcfa1f7c263a3c3d1091697d4bcbd08ddaf021ea Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 4 Feb 2025 17:45:20 +0100
Subject: [PATCH 01/12] Added corrections to re-enable reciprocal test in
 math_brute_force suite for relaxed math mode (#2221)

fixes #2145

As suggested by @svenvh reciprocal has different precision requirements
than divide. This PR introduces special path for reciprocal for
binar_float_operator to test reciprocal with relaxed math. If this PR
will get approvals, invalidate PR #2162
---
 .../binary_operator_double.cpp                | 38 +++++++---
 .../binary_operator_float.cpp                 | 70 ++++++++++++++-----
 .../math_brute_force/binary_operator_half.cpp | 48 +++++++++----
 .../math_brute_force/function_list.cpp        | 20 +++++-
 test_conformance/math_brute_force/main.cpp    |  9 +--
 .../math_brute_force/reference_math.cpp       | 12 ++--
 6 files changed, 146 insertions(+), 51 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 7600ab16..43cf7eff 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -214,6 +214,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_double *s;
     cl_double *s2;
 
+    bool reciprocal = strcmp(name, "reciprocal") == 0;
+    const double reciprocalArrayX[] = { 1.0 };
+    const double *specialValuesX =
+        reciprocal ? reciprocalArrayX : specialValues;
+    size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount;
+
     Force64BitFPUPrecision();
 
     cl_event e[VECTOR_SIZE_COUNT];
@@ -242,7 +248,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
     cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
     cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
+    int totalSpecialValueCount = specialValuesCountX * specialValuesCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
     // Test edge cases
@@ -252,14 +258,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         cl_double *fp2 = (cl_double *)p2;
         uint32_t x, y;
 
-        x = (job_id * buffer_elements) % specialValuesCount;
+        x = (job_id * buffer_elements) % specialValuesCountX;
         y = (job_id * buffer_elements) / specialValuesCount;
 
         for (; idx < buffer_elements; idx++)
         {
-            fp[idx] = specialValues[x];
+            fp[idx] = specialValuesX[x];
             fp2[idx] = specialValues[y];
-            if (++x >= specialValuesCount)
+            ++x;
+            if (x >= specialValuesCountX)
             {
                 x = 0;
                 y++;
@@ -271,7 +278,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
-        p[idx] = genrand_int64(d);
+        p[idx] =
+            reciprocal ? ((cl_ulong *)specialValuesX)[0] : genrand_int64(d);
         p2[idx] = genrand_int64(d);
     }
 
@@ -375,8 +383,13 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
     s = (cl_double *)gIn + thread_id * buffer_elements;
     s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_ff(s[j], s2[j]);
+
+    if (reciprocal)
+        for (size_t j = 0; j < buffer_elements; j++)
+            r[j] = (float)func.f_f(s2[j]);
+    else
+        for (size_t j = 0; j < buffer_elements; j++)
+            r[j] = (cl_double)func.f_ff(s[j], s2[j]);
 
     // Read the data back -- no need to wait for the first N-1 buffers but wait
     // for the last buffer. This is an in order queue.
@@ -406,7 +419,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (t[j] != q[j])
             {
                 cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_ff(s[j], s2[j]);
+                long double correct =
+                    reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
+
                 float err = Bruteforce_Ulp_Error_Double(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
@@ -479,8 +494,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     }
                     else if (IsDoubleSubnormal(s2[j]))
                     {
-                        long double correct2 = func.f_ff(s[j], 0.0);
-                        long double correct3 = func.f_ff(s[j], -0.0);
+                        long double correct2 =
+                            reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
+                        long double correct3 =
+                            reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
+
                         float err2 =
                             Bruteforce_Ulp_Error_Double(test, correct2);
                         float err3 =
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 6f5a3645..49cfe670 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -208,6 +208,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_float *s2 = 0;
     RoundingMode oldRoundMode;
 
+    bool reciprocal = strcmp(name, "reciprocal") == 0;
+    const float reciprocalArrayX[] = { 1.f };
+    const float *specialValuesX = reciprocal ? reciprocalArrayX : specialValues;
+    size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount;
+
     if (relaxedMode)
     {
         func = job->f->rfunc;
@@ -239,7 +244,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
     cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
     cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
+    int totalSpecialValueCount = specialValuesCountX * specialValuesCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
     if (job_id <= (cl_uint)lastSpecialJobIndex)
@@ -247,15 +252,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         // Insert special values
         uint32_t x, y;
 
-        x = (job_id * buffer_elements) % specialValuesCount;
+        x = (job_id * buffer_elements) % specialValuesCountX;
         y = (job_id * buffer_elements) / specialValuesCount;
 
         for (; idx < buffer_elements; idx++)
         {
-            p[idx] = ((cl_uint *)specialValues)[x];
+            p[idx] = ((cl_uint *)specialValuesX)[x];
             p2[idx] = ((cl_uint *)specialValues)[y];
             ++x;
-            if (x >= specialValuesCount)
+            if (x >= specialValuesCountX)
             {
                 x = 0;
                 y++;
@@ -269,13 +274,19 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
                 if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
             }
+            else if (relaxedMode && reciprocal)
+            {
+                cl_uint p2j = p2[idx] & 0x7fffffff;
+                // Replace values outside [2^-126, 2^126] with QNaN
+                if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000;
+            }
         }
     }
 
     // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
-        p[idx] = genrand_int32(d);
+        p[idx] = reciprocal ? ((cl_uint *)specialValuesX)[0] : genrand_int32(d);
         p2[idx] = genrand_int32(d);
 
         if (relaxedMode && strcmp(name, "divide") == 0)
@@ -286,6 +297,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
             if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
         }
+        else if (relaxedMode && reciprocal)
+        {
+            cl_uint p2j = p2[idx] & 0x7fffffff;
+            // Replace values outside [2^-126, 2^126] with QNaN
+            if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000;
+        }
     }
 
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -402,18 +419,31 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     s2 = (float *)gIn2 + thread_id * buffer_elements;
     if (gInfNanSupport)
     {
-        for (size_t j = 0; j < buffer_elements; j++)
-            r[j] = (float)func.f_ff(s[j], s2[j]);
+        if (reciprocal)
+            for (size_t j = 0; j < buffer_elements; j++)
+                r[j] = (float)func.f_f(s2[j]);
+        else
+            for (size_t j = 0; j < buffer_elements; j++)
+                r[j] = (float)func.f_ff(s[j], s2[j]);
     }
     else
     {
-        for (size_t j = 0; j < buffer_elements; j++)
-        {
-            feclearexcept(FE_OVERFLOW);
-            r[j] = (float)func.f_ff(s[j], s2[j]);
-            overflow[j] =
-                FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
-        }
+        if (reciprocal)
+            for (size_t j = 0; j < buffer_elements; j++)
+            {
+                feclearexcept(FE_OVERFLOW);
+                r[j] = (float)func.f_f(s2[j]);
+                overflow[j] =
+                    FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
+        else
+            for (size_t j = 0; j < buffer_elements; j++)
+            {
+                feclearexcept(FE_OVERFLOW);
+                r[j] = (float)func.f_ff(s[j], s2[j]);
+                overflow[j] =
+                    FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
     }
 
     if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
@@ -448,7 +478,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (t[j] != q[j])
             {
                 float test = ((float *)q)[j];
-                double correct = func.f_ff(s[j], s2[j]);
+                double correct =
+                    reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
 
                 // Per section 10 paragraph 6, accept any result if an input or
                 // output is a infinity or NaN or overflow
@@ -485,7 +516,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     }
 
                     // retry per section 6.5.3.3
-                    if (IsFloatSubnormal(s[j]))
+                    if (!reciprocal && IsFloatSubnormal(s[j]))
                     {
                         double correct2, correct3;
                         float err2, err3;
@@ -591,8 +622,10 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
                         if (!gInfNanSupport) feclearexcept(FE_OVERFLOW);
 
-                        correct2 = func.f_ff(s[j], 0.0);
-                        correct3 = func.f_ff(s[j], -0.0);
+                        correct2 =
+                            reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
+                        correct3 =
+                            reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
 
                         // Per section 10 paragraph 6, accept any result if an
                         // input or output is a infinity or NaN or overflow
@@ -625,7 +658,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     }
                 }
 
-
                 if (fabsf(err) > tinfo->maxError)
                 {
                     tinfo->maxError = fabsf(err);
diff --git a/test_conformance/math_brute_force/binary_operator_half.cpp b/test_conformance/math_brute_force/binary_operator_half.cpp
index b4abf490..1a0776e3 100644
--- a/test_conformance/math_brute_force/binary_operator_half.cpp
+++ b/test_conformance/math_brute_force/binary_operator_half.cpp
@@ -120,6 +120,12 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     std::vector<float> s(0), s2(0);
     RoundingMode oldRoundMode;
 
+    bool reciprocal = strcmp(name, "reciprocal") == 0;
+    const cl_half reciprocalArrayHalfX[] = { 0x3c00 };
+    const cl_half *specialValuesHalfX =
+        reciprocal ? reciprocalArrayHalfX : specialValuesHalf;
+    size_t specialValuesHalfCountX = reciprocal ? 1 : specialValuesHalfCount;
+
     cl_event e[VECTOR_SIZE_COUNT];
     cl_half *out[VECTOR_SIZE_COUNT];
 
@@ -148,7 +154,7 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     cl_half *p2 = (cl_half *)gIn2 + thread_id * buffer_elements;
     cl_uint idx = 0;
     int totalSpecialValueCount =
-        specialValuesHalfCount * specialValuesHalfCount;
+        specialValuesHalfCountX * specialValuesHalfCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
     if (job_id <= (cl_uint)lastSpecialJobIndex)
@@ -156,14 +162,15 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
         // Insert special values
         uint32_t x, y;
 
-        x = (job_id * buffer_elements) % specialValuesHalfCount;
+        x = (job_id * buffer_elements) % specialValuesHalfCountX;
         y = (job_id * buffer_elements) / specialValuesHalfCount;
 
         for (; idx < buffer_elements; idx++)
         {
-            p[idx] = specialValuesHalf[x];
+            p[idx] = specialValuesHalfX[x];
             p2[idx] = specialValuesHalf[y];
-            if (++x >= specialValuesHalfCount)
+            ++x;
+            if (x >= specialValuesHalfCountX)
             {
                 x = 0;
                 y++;
@@ -175,7 +182,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
-        p[idx] = (cl_half)genrand_int32(d);
+        p[idx] = reciprocal ? ((cl_half *)specialValuesHalfX)[0]
+                            : (cl_half)genrand_int32(d);
         p2[idx] = (cl_half)genrand_int32(d);
     }
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -283,11 +291,23 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     s.resize(buffer_elements);
     s2.resize(buffer_elements);
 
-    for (size_t j = 0; j < buffer_elements; j++)
+    if (reciprocal)
     {
-        s[j] = HTF(p[j]);
-        s2[j] = HTF(p2[j]);
-        r[j] = HFF(func.f_ff(s[j], s2[j]));
+        for (size_t j = 0; j < buffer_elements; j++)
+        {
+            s[j] = HTF(p[j]);
+            s2[j] = HTF(p2[j]);
+            r[j] = HFF(func.f_f(s2[j]));
+        }
+    }
+    else
+    {
+        for (size_t j = 0; j < buffer_elements; j++)
+        {
+            s[j] = HTF(p[j]);
+            s2[j] = HTF(p2[j]);
+            r[j] = HFF(func.f_ff(s[j], s2[j]));
+        }
     }
 
     if (ftz) RestoreFPState(&oldMode);
@@ -320,7 +340,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             if (r[j] != q[j])
             {
                 float test = HTF(q[j]);
-                float correct = func.f_ff(s[j], s2[j]);
+                float correct =
+                    reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
 
                 // Per section 10 paragraph 6, accept any result if an input or
                 // output is a infinity or NaN or overflow
@@ -446,9 +467,10 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
                         double correct2, correct3;
                         float err2, err3;
 
-                        correct2 = func.f_ff(s[j], 0.0);
-                        correct3 = func.f_ff(s[j], -0.0);
-
+                        correct2 =
+                            reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
+                        correct3 =
+                            reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
 
                         // Per section 10 paragraph 6, accept any result if an
                         // input or output is a infinity or NaN or overflow
diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
index f06921bd..fcf1ea23 100644
--- a/test_conformance/math_brute_force/function_list.cpp
+++ b/test_conformance/math_brute_force/function_list.cpp
@@ -78,6 +78,10 @@
 #define reference_copysign NULL
 #define reference_sqrt NULL
 #define reference_sqrtl NULL
+#define reference_reciprocal NULL
+#define reference_reciprocall NULL
+#define reference_relaxed_reciprocal NULL
+
 #define reference_divide NULL
 #define reference_dividel NULL
 #define reference_relaxed_divide NULL
@@ -346,7 +350,6 @@ const Func functionList[] = {
 
     ENTRY(pown, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
     ENTRY(powr, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF),
-    //ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
     ENTRY(remainder, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
     ENTRY(remquo, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
     ENTRY(rint, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
@@ -418,6 +421,21 @@ const Func functionList[] = {
     // basic operations
     OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
     OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
+    //ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
+    { "reciprocal",
+      "/",
+      { (void*)reference_reciprocal },
+      { (void*)reference_reciprocall },
+      { (void*)reference_relaxed_reciprocal },
+      2.5f,
+      0.0f,
+      0.0f,
+      3.0f,
+      2.5f,
+      INFINITY,
+      FTZ_OFF,
+      RELAXED_ON,
+      binaryOperatorF },
     { "divide",
       "/",
       { (void*)reference_divide },
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index 38954f3f..f919ffa6 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -154,7 +154,7 @@ static int doTest(const char *name)
         exit(EXIT_FAILURE);
     }
 
-    if (func_data->func.p == NULL)
+    if (func_data->func.p == NULL && func_data->rfunc.p == NULL)
     {
         vlog("'%s' is missing implementation, skipping function.\n",
              func_data->name);
@@ -308,9 +308,10 @@ static test_definition test_list[] = {
     ADD_TEST(half_log),      ADD_TEST(half_log2),  ADD_TEST(half_log10),
     ADD_TEST(half_powr),     ADD_TEST(half_recip), ADD_TEST(half_rsqrt),
     ADD_TEST(half_sin),      ADD_TEST(half_sqrt),  ADD_TEST(half_tan),
-    ADD_TEST(add),           ADD_TEST(subtract),   ADD_TEST(divide),
-    ADD_TEST(divide_cr),     ADD_TEST(multiply),   ADD_TEST(assignment),
-    ADD_TEST(not ),          ADD_TEST(erf),        ADD_TEST(erfc),
+    ADD_TEST(add),           ADD_TEST(subtract),   ADD_TEST(reciprocal),
+    ADD_TEST(divide),        ADD_TEST(divide_cr),  ADD_TEST(multiply),
+    ADD_TEST(assignment),    ADD_TEST(not ),       ADD_TEST(erf),
+    ADD_TEST(erfc),
 };
 
 #undef ADD_TEST
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index 049f2013..acde1136 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -1856,6 +1856,13 @@ double reference_logb(double x)
 
 double reference_relaxed_reciprocal(double x) { return 1.0f / ((float)x); }
 
+long double reference_reciprocall(long double y)
+{
+    double dx = 1.0;
+    double dy = y;
+    return dx / dy;
+}
+
 double reference_reciprocal(double x) { return 1.0 / x; }
 
 double reference_remainder(double x, double y)
@@ -3740,9 +3747,6 @@ long double reference_nanl(cl_ulong x)
     return (long double)u.f;
 }
 
-
-long double reference_reciprocall(long double x) { return 1.0L / x; }
-
 long double reference_remainderl(long double x, long double y)
 {
     int i;
@@ -5771,4 +5775,4 @@ long double reference_erfcl(long double x) { return erfc(x); }
 long double reference_erfl(long double x) { return erf(x); }
 
 double reference_erfc(double x) { return erfc(x); }
-double reference_erf(double x) { return erf(x); }
\ No newline at end of file
+double reference_erf(double x) { return erf(x); }

From 2031e21a5869e00bc04425e09b0a2eef9eeb6d96 Mon Sep 17 00:00:00 2001
From: Antonios Christidis <96555013+Antonios-C@users.noreply.github.com>
Date: Wed, 5 Feb 2025 06:58:17 -0600
Subject: [PATCH 02/12] Fix Build Warnings for AArch64 (#2242)

This commit links to issue (#2234).

When cross-compiling for AArch64, using gcc 13.3, you encounter three
warnings types that turn into errors:

- maybe-uninitialized
- stringop-truncation
- strict-aliasing

This commit fixes all the warnings found, in regards to the first two
rules. To resolve the warnigns due to strict-aliasing, I am editing the
CMake build system.

Signed-off-by: Antonios Christidis <a-christidis@ti.com>
---
 CMakeLists.txt                                   |  1 +
 test_common/harness/imageHelpers.cpp             | 16 ++++++++++++++++
 test_common/harness/typeWrappers.cpp             |  2 +-
 test_conformance/SVM/test_enqueue_api.cpp        |  4 ++--
 .../api/test_wg_suggested_local_work_size.cpp    |  2 +-
 test_conformance/basic/test_imagereadwrite.cpp   |  6 ++++++
 test_conformance/basic/test_imagereadwrite3d.cpp |  6 ++++++
 test_conformance/buffers/test_buffer_write.cpp   |  4 ++--
 test_conformance/contractions/contractions.cpp   | 10 ++++++----
 .../conversions/basic_test_conversions.cpp       | 14 ++++++++++----
 .../conversions/test_conversions.cpp             |  7 ++++---
 .../events/test_event_dependencies.cpp           |  2 +-
 test_conformance/half/Test_vStoreHalf.cpp        |  8 ++++----
 test_conformance/half/main.cpp                   |  5 +++--
 .../images/clCopyImage/test_copy_1D.cpp          |  2 +-
 .../images/clCopyImage/test_copy_1D_array.cpp    |  2 +-
 .../images/clCopyImage/test_copy_2D.cpp          |  2 +-
 .../images/clCopyImage/test_copy_2D_2D_array.cpp |  2 +-
 .../images/clCopyImage/test_copy_2D_3D.cpp       |  2 +-
 .../images/clCopyImage/test_loops.cpp            |  4 ++--
 .../images/clFillImage/test_fill_generic.cpp     |  5 +++++
 test_conformance/images/clGetInfo/test_2D.cpp    | 12 ++++++++++++
 .../test_cl_ext_image_buffer.hpp                 |  2 +-
 .../test_cl_ext_image_from_buffer.cpp            |  2 +-
 .../images/kernel_read_write/test_iterations.cpp |  2 +-
 .../kernel_read_write/test_write_image.cpp       |  4 ++--
 test_conformance/integer_ops/test_unary_ops.cpp  |  2 +-
 test_conformance/printf/test_printf.cpp          |  3 ++-
 test_conformance/select/test_select.cpp          | 12 ++++++++----
 test_conformance/subgroups/subhelpers.h          |  2 +-
 30 files changed, 104 insertions(+), 43 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 40deed8c..d353760a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -107,6 +107,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang"
     add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive
     add_cxx_flag_if_supported(-Wno-unknown-pragmas) # Issue #785
     add_cxx_flag_if_supported(-Wno-error=asm-operand-widths) # Issue #784
+    add_cxx_flag_if_supported(-Wno-strict-aliasing)  # Issue 2234
 
     # -msse -mfpmath=sse to force gcc to use sse for float math,
     # avoiding excess precision problems that cause tests like int2float
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index b354baeb..3de7e948 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -2415,6 +2415,12 @@ int debug_find_vector_in_image(void *imagePtr, image_descriptor *imageInfo,
                 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
             depth = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
             break;
+        default:
+            log_error("ERROR: Invalid imageInfo->type = %d\n", imageInfo->type);
+            width = 0;
+            depth = 0;
+            height = 0;
+            break;
     }
 
     row_pitch = width * get_pixel_size(imageInfo->format);
@@ -3661,6 +3667,11 @@ void copy_image_data(image_descriptor *srcImageInfo,
                     ? (srcImageInfo->height >> src_lod)
                     : 1;
                 break;
+            default:
+                log_error("ERROR: Invalid srcImageInfo->type = %d\n",
+                          srcImageInfo->type);
+                src_lod = 0;
+                break;
         }
         src_mip_level_offset = compute_mip_level_offset(srcImageInfo, src_lod);
         src_row_pitch_lod =
@@ -3707,6 +3718,11 @@ void copy_image_data(image_descriptor *srcImageInfo,
                     ? (dstImageInfo->height >> dst_lod)
                     : 1;
                 break;
+            default:
+                log_error("ERROR: Invalid dstImageInfo->num_mip_levels = %d\n",
+                          dstImageInfo->num_mip_levels);
+                dst_lod = 0;
+                break;
         }
         dst_mip_level_offset = compute_mip_level_offset(dstImageInfo, dst_lod);
         dst_row_pitch_lod =
diff --git a/test_common/harness/typeWrappers.cpp b/test_common/harness/typeWrappers.cpp
index e6520b1c..ed7d3ce3 100644
--- a/test_common/harness/typeWrappers.cpp
+++ b/test_common/harness/typeWrappers.cpp
@@ -348,7 +348,7 @@ cl_int clProtectedImage::Create(cl_context context,
                                 const cl_image_format *fmt, size_t width,
                                 size_t height, size_t depth, size_t arraySize)
 {
-    cl_int error;
+    cl_int error = 0;
 #if defined(__APPLE__)
     int protect_pages = 1;
     cl_device_id devices[16];
diff --git a/test_conformance/SVM/test_enqueue_api.cpp b/test_conformance/SVM/test_enqueue_api.cpp
index 20fa4432..83e0b9af 100644
--- a/test_conformance/SVM/test_enqueue_api.cpp
+++ b/test_conformance/SVM/test_enqueue_api.cpp
@@ -160,8 +160,8 @@ REGISTER_TEST(svm_enqueue_api)
             error = clSetUserEventStatus(userEvent, CL_COMPLETE);
             test_error(error, "clSetUserEventStatus failed");
 
-            cl_uchar *src_ptr;
-            cl_uchar *dst_ptr;
+            cl_uchar *src_ptr = nullptr;
+            cl_uchar *dst_ptr = nullptr;
             if (test_case.srcAlloc == host)
             {
                 src_ptr = srcHostData.data();
diff --git a/test_conformance/api/test_wg_suggested_local_work_size.cpp b/test_conformance/api/test_wg_suggested_local_work_size.cpp
index f8464638..9b4a0b5e 100644
--- a/test_conformance/api/test_wg_suggested_local_work_size.cpp
+++ b/test_conformance/api/test_wg_suggested_local_work_size.cpp
@@ -208,7 +208,7 @@ int do_test_work_group_suggested_local_size(
     bool (*skip_cond)(size_t), size_t start, size_t end, size_t incr,
     cl_ulong max_local_mem_size, size_t global_work_offset[], num_dims dim)
 {
-    int err;
+    int err = 0;
     size_t test_values[] = { 1, 1, 1 };
     std::string kernel_names[6] = {
         "test_wg_scan_local_work_group_size",
diff --git a/test_conformance/basic/test_imagereadwrite.cpp b/test_conformance/basic/test_imagereadwrite.cpp
index c074238d..fee73ea3 100644
--- a/test_conformance/basic/test_imagereadwrite.cpp
+++ b/test_conformance/basic/test_imagereadwrite.cpp
@@ -314,6 +314,12 @@ test_imagereadwrite(cl_device_id device, cl_context context, cl_command_queue qu
                 }
                 outp = (void *)rgbafp_outptr;
                 break;
+            default:
+                log_error("ERROR Invalid j = %d\n", j);
+                elem_size = 0;
+                p = nullptr;
+                outp = nullptr;
+                break;
         }
 
         const char* update_packed_pitch_name = "";
diff --git a/test_conformance/basic/test_imagereadwrite3d.cpp b/test_conformance/basic/test_imagereadwrite3d.cpp
index 66f64e09..d52f16d0 100644
--- a/test_conformance/basic/test_imagereadwrite3d.cpp
+++ b/test_conformance/basic/test_imagereadwrite3d.cpp
@@ -320,6 +320,12 @@ test_imagereadwrite3d(cl_device_id device, cl_context context, cl_command_queue
                 }
                 outp = (void *)rgbafp_outptr;
                 break;
+            default:
+                log_error("ERROR Invalid j = %d\n", j);
+                elem_size = 0;
+                p = nullptr;
+                outp = nullptr;
+                break;
         }
 
         const char* update_packed_pitch_name = "";
diff --git a/test_conformance/buffers/test_buffer_write.cpp b/test_conformance/buffers/test_buffer_write.cpp
index e57e1c18..bf41e48c 100644
--- a/test_conformance/buffers/test_buffer_write.cpp
+++ b/test_conformance/buffers/test_buffer_write.cpp
@@ -852,8 +852,8 @@ int test_buffer_write_struct( cl_device_id deviceID, cl_context context, cl_comm
                     buffers[0] =
                         clCreateBuffer(context, flag_set[src_flag_id],
                                        ptrSizes[i] * num_elements, NULL, &err);
-                if ( err ){
-                    align_free( outptr[i] );
+                if (err)
+                {
                     print_error(err, " clCreateBuffer failed\n" );
                     free_mtdata(d);
                     return -1;
diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp
index 880a23ab..abe95af5 100644
--- a/test_conformance/contractions/contractions.cpp
+++ b/test_conformance/contractions/contractions.cpp
@@ -365,16 +365,18 @@ static int ParseArgs( int argc, const char **argv )
     int length_of_seed = 0;
 
     { // Extract the app name
-        strncpy( appName, argv[0], MAXPATHLEN );
+        strncpy(appName, argv[0], MAXPATHLEN - 1);
+        appName[MAXPATHLEN - 1] = '\0';
 
 #if (defined( __APPLE__ ) || defined(__linux__) || defined(__MINGW32__))
         char baseName[MAXPATHLEN];
         char *base = NULL;
-        strncpy( baseName, argv[0], MAXPATHLEN );
+        strncpy(baseName, argv[0], MAXPATHLEN - 1);
+        baseName[MAXPATHLEN - 1] = '\0';
         base = basename( baseName );
         if( NULL != base )
         {
-            strncpy( appName, base, sizeof( appName )  );
+            strncpy(appName, base, sizeof(appName) - 1);
             appName[ sizeof( appName ) -1 ] = '\0';
         }
 #elif defined (_WIN32)
@@ -385,7 +387,7 @@ static int ParseArgs( int argc, const char **argv )
                                    fname, _MAX_FNAME, ext, _MAX_EXT );
         if (err == 0) { // no error
             strcat (fname, ext); //just cat them, size of frame can keep both
-            strncpy (appName, fname, sizeof(appName));
+            strncpy(appName, fname, sizeof(appName) - 1);
             appName[ sizeof( appName ) -1 ] = '\0';
         }
 #endif
diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp
index 3880c820..edad671b 100644
--- a/test_conformance/conversions/basic_test_conversions.cpp
+++ b/test_conformance/conversions/basic_test_conversions.cpp
@@ -1448,7 +1448,9 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
         char inName[32];
         char outName[32];
         strncpy(inName, gTypeNames[inType], sizeof(inName));
+        inName[sizeof(inName) - 1] = '\0';
         strncpy(outName, gTypeNames[outType], sizeof(outName));
+        outName[sizeof(outName) - 1] = '\0';
         sprintf(testName, "test_implicit_%s_%s", outName, inName);
 
         source << "__kernel void " << testName << "( __global " << inName
@@ -1473,8 +1475,10 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
         switch (vectorSizetmp)
         {
             case 1:
-                strncpy(inName, gTypeNames[inType], sizeof(inName));
-                strncpy(outName, gTypeNames[outType], sizeof(outName));
+                strncpy(inName, gTypeNames[inType], sizeof(inName) - 1);
+                inName[sizeof(inName) - 1] = '\0';
+                strncpy(outName, gTypeNames[outType], sizeof(outName) - 1);
+                outName[sizeof(outName) - 1] = '\0';
                 snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
                          outName, gSaturationNames[sat],
                          gRoundingModeNames[round]);
@@ -1482,8 +1486,10 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
                 vlog("Building %s( %s ) test\n", convertString, inName);
                 break;
             case 3:
-                strncpy(inName, gTypeNames[inType], sizeof(inName));
-                strncpy(outName, gTypeNames[outType], sizeof(outName));
+                strncpy(inName, gTypeNames[inType], sizeof(inName) - 1);
+                inName[sizeof(inName) - 1] = '\0';
+                strncpy(outName, gTypeNames[outType], sizeof(outName) - 1);
+                outName[sizeof(outName) - 1] = '\0';
                 snprintf(convertString, sizeof(convertString),
                          "convert_%s3%s%s", outName, gSaturationNames[sat],
                          gRoundingModeNames[round]);
diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index 8225769e..c63572c3 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -182,11 +182,12 @@ static int ParseArgs(int argc, const char **argv)
 #if (defined(__APPLE__) || defined(__linux__) || defined(__MINGW32__))
     { // Extract the app name
         char baseName[MAXPATHLEN];
-        strncpy(baseName, argv[0], MAXPATHLEN);
+        strncpy(baseName, argv[0], MAXPATHLEN - 1);
+        baseName[sizeof(baseName) - 1] = '\0';
         char *base = basename(baseName);
         if (NULL != base)
         {
-            strncpy(appName, base, sizeof(appName));
+            strncpy(appName, base, sizeof(appName) - 1);
             appName[sizeof(appName) - 1] = '\0';
         }
     }
@@ -200,7 +201,7 @@ static int ParseArgs(int argc, const char **argv)
         if (err == 0)
         { // no error
             strcat(fname, ext); // just cat them, size of frame can keep both
-            strncpy(appName, fname, sizeof(appName));
+            strncpy(appName, fname, sizeof(appName) - 1);
             appName[sizeof(appName) - 1] = '\0';
         }
     }
diff --git a/test_conformance/events/test_event_dependencies.cpp b/test_conformance/events/test_event_dependencies.cpp
index b40a69dd..72e0f8e4 100644
--- a/test_conformance/events/test_event_dependencies.cpp
+++ b/test_conformance/events/test_event_dependencies.cpp
@@ -89,7 +89,7 @@ int test_event_enqueue_wait_for_events_run_test(
 
     // If we are to use two devices, then get them and create a context with
     // both.
-    cl_device_id *two_device_ids;
+    cl_device_id *two_device_ids = nullptr;
     if (two_devices)
     {
         two_device_ids = (cl_device_id *)malloc(sizeof(cl_device_id) * 2);
diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp
index e5a425b0..cf914a9a 100644
--- a/test_conformance/half/Test_vStoreHalf.cpp
+++ b/test_conformance/half/Test_vStoreHalf.cpp
@@ -341,8 +341,8 @@ int Test_vStoreHalf_private(cl_device_id device, f2h referenceFunc,
     int vectorSize, error;
     cl_program programs[kVectorSizeCount + kStrangeVectorSizeCount][3];
     cl_kernel kernels[kVectorSizeCount + kStrangeVectorSizeCount][3];
-    cl_program resetProgram;
-    cl_kernel resetKernel;
+    cl_program resetProgram = nullptr;
+    cl_kernel resetKernel = nullptr;
 
     uint64_t time[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
     uint64_t min_time[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
@@ -1225,8 +1225,8 @@ int Test_vStoreaHalf_private(cl_device_id device, f2h referenceFunc,
     int vectorSize, error;
     cl_program programs[kVectorSizeCount + kStrangeVectorSizeCount][3];
     cl_kernel kernels[kVectorSizeCount + kStrangeVectorSizeCount][3];
-    cl_program resetProgram;
-    cl_kernel resetKernel;
+    cl_program resetProgram = nullptr;
+    cl_kernel resetKernel = nullptr;
 
     uint64_t time[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
     uint64_t min_time[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp
index ee44fb2d..82b2d769 100644
--- a/test_conformance/half/main.cpp
+++ b/test_conformance/half/main.cpp
@@ -144,11 +144,12 @@ static int ParseArgs( int argc, const char **argv )
 #if (defined( __APPLE__ ) || defined(__linux__) || defined(__MINGW32__))
     { // Extract the app name
         char baseName[ MAXPATHLEN ];
-        strncpy( baseName, argv[0], MAXPATHLEN );
+        strncpy(baseName, argv[0], MAXPATHLEN - 1);
+        baseName[MAXPATHLEN - 1] = '\0';
         char *base = basename( baseName );
         if( NULL != base )
         {
-            strncpy( appName, base, sizeof( appName )  );
+            strncpy(appName, base, sizeof(appName) - 1);
             appName[ sizeof( appName ) -1 ] = '\0';
         }
     }
diff --git a/test_conformance/images/clCopyImage/test_copy_1D.cpp b/test_conformance/images/clCopyImage/test_copy_1D.cpp
index b4ae8308..7d13eaab 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D.cpp
@@ -25,7 +25,7 @@ int test_copy_image_size_1D( cl_context context, cl_command_queue queue, image_d
     size_t src_lod = 0, src_width_lod = imageInfo->width, src_row_pitch_lod;
     size_t dst_lod = 0, dst_width_lod = imageInfo->width, dst_row_pitch_lod;
     size_t width_lod = imageInfo->width;
-    size_t max_mip_level;
+    size_t max_mip_level = 0;
 
     if( gTestMipmaps )
     {
diff --git a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
index f0b610bb..d94ba8ef 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
@@ -25,7 +25,7 @@ int test_copy_image_size_1D_array( cl_context context, cl_command_queue queue, i
     size_t src_lod = 0, src_width_lod = imageInfo->width, src_row_pitch_lod;
     size_t dst_lod = 0, dst_width_lod = imageInfo->width, dst_row_pitch_lod;
     size_t width_lod = imageInfo->width;
-    size_t max_mip_level;
+    size_t max_mip_level = 0;
 
     if( gTestMipmaps )
     {
diff --git a/test_conformance/images/clCopyImage/test_copy_2D.cpp b/test_conformance/images/clCopyImage/test_copy_2D.cpp
index 448b47f0..97cca26c 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D.cpp
@@ -27,7 +27,7 @@ int test_copy_image_size_2D( cl_context context, cl_command_queue queue, image_d
     size_t dst_lod = 0, dst_width_lod = imageInfo->width, dst_row_pitch_lod;
     size_t dst_height_lod = imageInfo->height;
     size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
-    size_t max_mip_level;
+    size_t max_mip_level = 0;
 
     if( gTestMipmaps )
     {
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
index 1819d87c..9ba8718a 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
@@ -72,7 +72,7 @@ int test_copy_image_size_2D_2D_array( cl_context context, cl_command_queue queue
     size_t threeImage_lod = 0, threeImage_width_lod = threeImage->width, threeImage_row_pitch_lod, threeImage_slice_pitch_lod;
     size_t threeImage_height_lod = threeImage->height;
     size_t width_lod, height_lod;
-    size_t twoImage_max_mip_level,threeImage_max_mip_level;
+    size_t twoImage_max_mip_level = 0, threeImage_max_mip_level = 0;
 
     if( gTestMipmaps )
     {
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
index 4ab6b42a..5f522e3e 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
@@ -68,7 +68,7 @@ int test_copy_image_size_2D_3D( cl_context context, cl_command_queue queue, imag
     size_t threeImage_lod = 0, threeImage_width_lod = threeImage->width, threeImage_row_pitch_lod, threeImage_slice_pitch_lod;
     size_t threeImage_height_lod = threeImage->height, depth_lod = threeImage->depth;
     size_t width_lod, height_lod;
-    size_t twoImage_max_mip_level,threeImage_max_mip_level;
+    size_t twoImage_max_mip_level = 0, threeImage_max_mip_level = 0;
 
     if( gTestMipmaps )
     {
diff --git a/test_conformance/images/clCopyImage/test_loops.cpp b/test_conformance/images/clCopyImage/test_loops.cpp
index ea60d356..d9c54854 100644
--- a/test_conformance/images/clCopyImage/test_loops.cpp
+++ b/test_conformance/images/clCopyImage/test_loops.cpp
@@ -39,8 +39,8 @@ extern int test_copy_image_set_1D_buffer_1D(cl_device_id device,
 
 int test_image_type( cl_device_id device, cl_context context, cl_command_queue queue, MethodsToTest testMethod, cl_mem_flags flags )
 {
-    const char *name;
-    cl_mem_object_type imageType;
+    const char *name = nullptr;
+    cl_mem_object_type imageType = 0;
 
     if ( gTestMipmaps )
     {
diff --git a/test_conformance/images/clFillImage/test_fill_generic.cpp b/test_conformance/images/clFillImage/test_fill_generic.cpp
index 17b6182e..24c91813 100644
--- a/test_conformance/images/clFillImage/test_fill_generic.cpp
+++ b/test_conformance/images/clFillImage/test_fill_generic.cpp
@@ -277,6 +277,11 @@ cl_mem create_image( cl_context context, cl_command_queue queue, BufferOwningPtr
             depth = imageInfo->depth;
             imageSize = imageInfo->slicePitch * imageInfo->depth;
             break;
+        default:
+            log_error("ERROR Invalid imageInfo->type = %d\n", imageInfo->type);
+            height = 0;
+            depth = 0;
+            break;
     }
 
     size_t origin[ 3 ] = { 0, 0, 0 };
diff --git a/test_conformance/images/clGetInfo/test_2D.cpp b/test_conformance/images/clGetInfo/test_2D.cpp
index 49631bf4..76588a09 100644
--- a/test_conformance/images/clGetInfo/test_2D.cpp
+++ b/test_conformance/images/clGetInfo/test_2D.cpp
@@ -181,6 +181,10 @@ int test_get_image_info_single( cl_context context, image_descriptor *imageInfo,
     case CL_MEM_OBJECT_IMAGE3D:
       required_height = imageInfo->height;
       break;
+    default:
+        log_error("ERROR: Invalid imageInfo->type = %d\n", imageInfo->type);
+        required_height = 0;
+        break;
   }
 
     size_t outHeight;
@@ -204,6 +208,10 @@ int test_get_image_info_single( cl_context context, image_descriptor *imageInfo,
     case CL_MEM_OBJECT_IMAGE3D:
       required_depth = imageInfo->depth;
       break;
+    default:
+        log_error("ERROR: Invalid imageInfo->type = %d\n", imageInfo->type);
+        required_depth = 0;
+        break;
   }
 
   size_t outDepth;
@@ -227,6 +235,10 @@ int test_get_image_info_single( cl_context context, image_descriptor *imageInfo,
     case CL_MEM_OBJECT_IMAGE2D_ARRAY:
       required_array_size = imageInfo->arraySize;
       break;
+    default:
+        log_error("ERROR: Invalid imageInfo->type = %d\n", imageInfo->type);
+        required_array_size = 0;
+        break;
   }
 
   size_t outArraySize;
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
index 887c9dca..77784b20 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
@@ -69,7 +69,7 @@ static inline size_t get_format_size(cl_context context,
     }
 
     cl_int error = 0;
-    cl_mem buffer;
+    cl_mem buffer = nullptr;
     if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)
     {
         buffer = clCreateBuffer(context, flags,
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
index 2dcc1827..8b82b9f9 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
@@ -769,7 +769,7 @@ int image_from_buffer_fill_positive(cl_device_id device, cl_context context,
                 err = clFinish(queue);
                 test_error(err, "Error clFinish");
 
-                cl_mem image1d_buffer;
+                cl_mem image1d_buffer = nullptr;
                 if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)
                 {
                     image1d_buffer = clCreateBuffer(context, flag, buffer_size,
diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp
index 0c87d87d..9c4e332a 100644
--- a/test_conformance/images/kernel_read_write/test_iterations.cpp
+++ b/test_conformance/images/kernel_read_write/test_iterations.cpp
@@ -1191,7 +1191,7 @@ int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel ke
 {
     int error;
     static int initHalf = 0;
-    cl_mem imageBuffer;
+    cl_mem imageBuffer = nullptr;
     cl_mem_flags    image_read_write_flags = CL_MEM_READ_ONLY;
     size_t threads[2];
 
diff --git a/test_conformance/images/kernel_read_write/test_write_image.cpp b/test_conformance/images/kernel_read_write/test_write_image.cpp
index 32f7c22f..ab73e6e1 100644
--- a/test_conformance/images/kernel_read_write/test_write_image.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_image.cpp
@@ -223,7 +223,7 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue
         clProtectedImage protImage;
         clMemWrapper unprotImage;
         cl_mem image;
-        cl_mem imageBuffer;
+        cl_mem imageBuffer = nullptr;
 
         if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
         {
@@ -910,7 +910,7 @@ int test_write_image_formats(cl_device_id device, cl_context context,
         gTestCount++;
 
         print_write_header( &imageFormat, false );
-        int retCode;
+        int retCode = 0;
         switch (imageType)
         {
             case CL_MEM_OBJECT_IMAGE1D:
diff --git a/test_conformance/integer_ops/test_unary_ops.cpp b/test_conformance/integer_ops/test_unary_ops.cpp
index da3de6d1..1f7fe855 100644
--- a/test_conformance/integer_ops/test_unary_ops.cpp
+++ b/test_conformance/integer_ops/test_unary_ops.cpp
@@ -97,7 +97,7 @@ int test_unary_op( cl_command_queue queue, cl_context context, OpKonstants which
         get_explicit_type_size(vecType) * vecSize * TEST_SIZE, inData, &error);
     test_error( error, "Creating input data array failed" );
 
-    cl_uint bits;
+    cl_uint bits = 0;
     for( i = 0; i < TEST_SIZE; i++ )
     {
         size_t which = i & 7;
diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index 0d5dfa7b..ef52f044 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -1151,7 +1151,8 @@ int main(int argc, const char* argv[])
     char* pcTempFname = get_temp_filename();
     if (pcTempFname != nullptr)
     {
-        strncpy(gFileName, pcTempFname, sizeof(gFileName));
+        strncpy(gFileName, pcTempFname, sizeof(gFileName) - 1);
+        gFileName[sizeof(gFileName) - 1] = '\0';
     }
 
     free(pcTempFname);
diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp
index 9cf4727a..20f5bd5e 100644
--- a/test_conformance/select/test_select.cpp
+++ b/test_conformance/select/test_select.cpp
@@ -261,14 +261,18 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr,
     switch( vec_len )
     {
         case 1:
-            strncpy(stypename, type_name[srctype], sizeof(stypename));
-            strncpy(ctypename, type_name[cmptype], sizeof(ctypename));
+            strncpy(stypename, type_name[srctype], sizeof(stypename) - 1);
+            stypename[sizeof(stypename) - 1] = '\0';
+            strncpy(ctypename, type_name[cmptype], sizeof(ctypename) - 1);
+            ctypename[sizeof(ctypename) - 1] = '\0';
             snprintf(testname, sizeof(testname), "select_%s_%s", stypename, ctypename );
             log_info("Building %s(%s, %s, %s)\n", testname, stypename, stypename, ctypename);
             break;
         case 3:
-            strncpy(stypename, type_name[srctype], sizeof(stypename));
-            strncpy(ctypename, type_name[cmptype], sizeof(ctypename));
+            strncpy(stypename, type_name[srctype], sizeof(stypename) - 1);
+            stypename[sizeof(stypename) - 1] = '\0';
+            strncpy(ctypename, type_name[cmptype], sizeof(ctypename) - 1);
+            ctypename[sizeof(ctypename) - 1] = '\0';
             snprintf(testname, sizeof(testname), "select_%s3_%s3", stypename, ctypename );
             log_info("Building %s(%s3, %s3, %s3)\n", testname, stypename, stypename, ctypename);
             break;
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index a081bd09..ab8ee797 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -1611,7 +1611,7 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct subgroup_test
         test_params.subgroup_size = subgroup_size;
         Fns::gen(idata.data(), mapin.data(), sgmap.data(), test_params);
 
-        test_status status;
+        test_status status = TEST_FAIL;
 
         if (test_params.divergence_mask_arg != -1)
         {

From a8b32b27204a8739350797d14118b72cccdaa248 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 6 Feb 2025 19:43:00 +0100
Subject: [PATCH 03/12] math_brute_force: remove LogBuildError (#2233)

`LogBuildError` was only ever called after `clSetKernelArg`, but setting
a kernel argument has no impact on the program build log. Printing of
the actual build log in case of a build failure is already handled via
`create_single_kernel_helper`.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 .../math_brute_force/binary_double.cpp        | 27 ++++----------
 .../math_brute_force/binary_float.cpp         | 27 ++++----------
 .../math_brute_force/binary_half.cpp          | 27 ++++----------
 .../math_brute_force/binary_i_double.cpp      | 27 ++++----------
 .../math_brute_force/binary_i_float.cpp       | 27 ++++----------
 .../math_brute_force/binary_i_half.cpp        | 27 ++++----------
 .../binary_operator_double.cpp                | 27 ++++----------
 .../binary_operator_float.cpp                 | 27 ++++----------
 .../math_brute_force/binary_operator_half.cpp | 27 ++++----------
 .../binary_two_results_i_double.cpp           | 37 ++++++-------------
 .../binary_two_results_i_float.cpp            | 37 ++++++-------------
 .../binary_two_results_i_half.cpp             | 37 ++++++-------------
 .../math_brute_force/i_unary_double.cpp       | 18 +++------
 .../math_brute_force/i_unary_float.cpp        | 18 +++------
 .../math_brute_force/i_unary_half.cpp         | 18 +++------
 .../math_brute_force/macro_binary_double.cpp  | 27 ++++----------
 .../math_brute_force/macro_binary_float.cpp   | 27 ++++----------
 .../math_brute_force/macro_binary_half.cpp    | 27 ++++----------
 .../math_brute_force/macro_unary_double.cpp   | 18 +++------
 .../math_brute_force/macro_unary_float.cpp    | 18 +++------
 .../math_brute_force/macro_unary_half.cpp     | 18 +++------
 .../math_brute_force/mad_double.cpp           | 36 ++++++------------
 .../math_brute_force/mad_float.cpp            | 36 ++++++------------
 .../math_brute_force/mad_half.cpp             | 36 ++++++------------
 test_conformance/math_brute_force/main.cpp    | 13 -------
 .../math_brute_force/ternary_double.cpp       | 36 ++++++------------
 .../math_brute_force/ternary_float.cpp        | 36 ++++++------------
 .../math_brute_force/ternary_half.cpp         | 36 ++++++------------
 .../math_brute_force/unary_double.cpp         | 18 +++------
 .../math_brute_force/unary_float.cpp          | 18 +++------
 .../math_brute_force/unary_half.cpp           | 18 +++------
 .../unary_two_results_double.cpp              | 28 +++++---------
 .../unary_two_results_float.cpp               | 28 +++++---------
 .../unary_two_results_half.cpp                | 28 +++++---------
 .../unary_two_results_i_double.cpp            | 28 +++++---------
 .../unary_two_results_i_float.cpp             | 28 +++++---------
 .../unary_two_results_i_half.cpp              | 28 +++++---------
 .../math_brute_force/unary_u_double.cpp       | 18 +++------
 .../math_brute_force/unary_u_float.cpp        | 18 +++------
 .../math_brute_force/unary_u_half.cpp         | 18 +++------
 test_conformance/math_brute_force/utility.h   |  3 --
 41 files changed, 324 insertions(+), 727 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index feeedc47..5510c7a2 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -344,26 +344,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index deea1ce9..6db812e5 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -349,26 +349,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_half.cpp b/test_conformance/math_brute_force/binary_half.cpp
index 70057db5..5bbcbbda 100644
--- a/test_conformance/math_brute_force/binary_half.cpp
+++ b/test_conformance/math_brute_force/binary_half.cpp
@@ -229,26 +229,15 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index a6c28557..5f563c73 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -346,26 +346,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index dfe25efc..a9a65719 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -337,26 +337,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_i_half.cpp b/test_conformance/math_brute_force/binary_i_half.cpp
index 0f167fc9..bd91be46 100644
--- a/test_conformance/math_brute_force/binary_i_half.cpp
+++ b/test_conformance/math_brute_force/binary_i_half.cpp
@@ -225,26 +225,15 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 43cf7eff..4dce5052 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -345,26 +345,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 49cfe670..c0c11c2e 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -367,26 +367,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_operator_half.cpp b/test_conformance/math_brute_force/binary_operator_half.cpp
index 1a0776e3..3bd45857 100644
--- a/test_conformance/math_brute_force/binary_operator_half.cpp
+++ b/test_conformance/math_brute_force/binary_operator_half.cpp
@@ -240,26 +240,15 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
index ec244c17..eca33f2f 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -185,31 +185,18 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_double) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
index 36d71898..213535a9 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -187,31 +187,18 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_float) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/binary_two_results_i_half.cpp b/test_conformance/math_brute_force/binary_two_results_i_half.cpp
index 3c3ef71a..a2379431 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_half.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_half.cpp
@@ -179,31 +179,18 @@ int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode)
             // align working group size with the bigger output type
             size_t vectorSize = sizeValues[j] * sizeof(int32_t);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index c2c7963f..4db27a62 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -122,18 +122,12 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeValues[j] * sizeof(cl_double);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index eed76a6e..1f84b901 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -121,18 +121,12 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/i_unary_half.cpp b/test_conformance/math_brute_force/i_unary_half.cpp
index 97955f93..0aad984b 100644
--- a/test_conformance/math_brute_force/i_unary_half.cpp
+++ b/test_conformance/math_brute_force/i_unary_half.cpp
@@ -118,18 +118,12 @@ int TestFunc_Int_Half(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_int);
             size_t localCount = (bufferSizeOut + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index 51d5b64b..ab969ad6 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -327,26 +327,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index b00a29ff..c49346cc 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -320,26 +320,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/macro_binary_half.cpp b/test_conformance/math_brute_force/macro_binary_half.cpp
index a8f459a7..ec10c65e 100644
--- a/test_conformance/math_brute_force/macro_binary_half.cpp
+++ b/test_conformance/math_brute_force/macro_binary_half.cpp
@@ -210,26 +210,15 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index b747b980..c6d81ab7 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -168,20 +168,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index 34f49a5a..85be620f 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -169,20 +169,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/macro_unary_half.cpp b/test_conformance/math_brute_force/macro_unary_half.cpp
index a1e92118..d80dd153 100644
--- a/test_conformance/math_brute_force/macro_unary_half.cpp
+++ b/test_conformance/math_brute_force/macro_unary_half.cpp
@@ -151,20 +151,12 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp
index b6a6742f..c0442e68 100644
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ b/test_conformance/math_brute_force/mad_double.cpp
@@ -131,30 +131,18 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_double) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer3), &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer3),
+                                   &gInBuffer3);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp
index 3f237ed9..2e3a294b 100644
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ b/test_conformance/math_brute_force/mad_float.cpp
@@ -132,30 +132,18 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_float) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer3), &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer3),
+                                   &gInBuffer3);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/mad_half.cpp b/test_conformance/math_brute_force/mad_half.cpp
index 33235534..0a59f3f1 100644
--- a/test_conformance/math_brute_force/mad_half.cpp
+++ b/test_conformance/math_brute_force/mad_half.cpp
@@ -126,30 +126,18 @@ int TestFunc_mad_Half(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_half) * sizeValues[j];
             size_t localCount = (bufferSize + vectorSize - 1)
                 / vectorSize; // bufferSize / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer3), &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer3),
+                                   &gInBuffer3);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index f919ffa6..c6a4b5d6 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -981,19 +981,6 @@ static void ReleaseCL(void)
     }
 }
 
-void _LogBuildError(cl_program p, int line, const char *file)
-{
-    char the_log[2048] = "";
-
-    vlog_error("%s:%d: Build Log:\n", file, line);
-    if (0
-        == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG,
-                                 sizeof(the_log), the_log, NULL))
-        vlog_error("%s", the_log);
-    else
-        vlog_error("*** Error getting build log for program %p\n", p);
-}
-
 int InitILogbConstants(void)
 {
     int error;
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index 62adbf29..aadebc25 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -236,30 +236,18 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
             size_t vectorSize = sizeof(cl_double) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer3), &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer3),
+                                   &gInBuffer3);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index efde6063..478090d4 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -258,30 +258,18 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_float) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer3), &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer3),
+                                   &gInBuffer3);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/ternary_half.cpp b/test_conformance/math_brute_force/ternary_half.cpp
index 856fa67d..843ceaa0 100644
--- a/test_conformance/math_brute_force/ternary_half.cpp
+++ b/test_conformance/math_brute_force/ternary_half.cpp
@@ -191,30 +191,18 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_half) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer3), &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer3),
+                                   &gInBuffer3);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index f3157fdf..4762a81d 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -176,20 +176,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 7d1f6cda..0a2af3be 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -205,20 +205,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument 0");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument 1");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/unary_half.cpp b/test_conformance/math_brute_force/unary_half.cpp
index 83cdd01f..877e1fad 100644
--- a/test_conformance/math_brute_force/unary_half.cpp
+++ b/test_conformance/math_brute_force/unary_half.cpp
@@ -154,20 +154,12 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
index 4d660e0a..a109cd6e 100644
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -143,25 +143,15 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_double);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index 3fd16cd3..a3b52c62 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -159,25 +159,15 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_two_results_half.cpp b/test_conformance/math_brute_force/unary_two_results_half.cpp
index 2bf35142..683e1492 100644
--- a/test_conformance/math_brute_force/unary_two_results_half.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_half.cpp
@@ -132,25 +132,15 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_half);
             size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
index 916f4a0c..dd60f43e 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -151,25 +151,15 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_double);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
index 0dbe3f77..6e01a794 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -156,25 +156,15 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_two_results_i_half.cpp b/test_conformance/math_brute_force/unary_two_results_i_half.cpp
index 7b3431ab..685611ea 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_half.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_half.cpp
@@ -145,25 +145,15 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode)
             // align working group size with the bigger output type
             size_t vectorSize = sizeValues[j] * sizeof(cl_int);
             size_t localCount = (bufferSizeHi + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
index 226ef068..514b7c60 100644
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -116,18 +116,12 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_double);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
index 3eb76794..d5d30495 100644
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -123,18 +123,12 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_u_half.cpp b/test_conformance/math_brute_force/unary_u_half.cpp
index 2970403f..cc3fe728 100644
--- a/test_conformance/math_brute_force/unary_u_half.cpp
+++ b/test_conformance/math_brute_force/unary_u_half.cpp
@@ -111,18 +111,12 @@ int TestFunc_Half_UShort(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_half);
             size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h
index 74cacc46..a43f3a64 100644
--- a/test_conformance/math_brute_force/utility.h
+++ b/test_conformance/math_brute_force/utility.h
@@ -114,9 +114,6 @@ inline double DoubleFromUInt32(uint32_t bits)
     return u.d;
 }
 
-void _LogBuildError(cl_program p, int line, const char *file);
-#define LogBuildError(program) _LogBuildError(program, __LINE__, __FILE__)
-
 // The spec is fairly clear that we may enforce a hard cutoff to prevent
 // premature flushing to zero.
 // However, to avoid conflict for 1.0, we are letting results at TYPE_MIN +

From ecd012737ff24dbd7881bdb720bbdd68da321880 Mon Sep 17 00:00:00 2001
From: Ahmed Hesham <117350656+ahesham-arm@users.noreply.github.com>
Date: Tue, 11 Feb 2025 16:43:37 +0000
Subject: [PATCH 04/12] Generate the SPIR-V shaders automatically at build time
 (#2200)

Add custom commands and targets to automatically assemble and validate
the SPIR-V shaders used by the test.

Automatic assembly depends on finding `python3`, `spirv-as` and
`spirv-val`. `SPIRV_TOOLS_DIR` can be defined by the user during
configuration to provide an override path. Default behaviour assumes
that the binaries exist in `PATH`.

---------

Signed-off-by: Ahmed Hesham <ahmed.hesham@arm.com>
---
 CMakeLists.txt                                |   2 +-
 test_conformance/spirv_new/CMakeLists.txt     |   5 +-
 .../spirv_new/spirv_asm/CMakeLists.txt        | 502 ++++++++++++++++++
 .../{ => spirv_asm}/assemble_spirv.py         |   0
 4 files changed, 507 insertions(+), 2 deletions(-)
 create mode 100644 test_conformance/spirv_new/spirv_asm/CMakeLists.txt
 rename test_conformance/spirv_new/{ => spirv_asm}/assemble_spirv.py (100%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d353760a..898f62e9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.5.1)
+cmake_minimum_required(VERSION 3.12.0)
 
 set( CONFORMANCE_SUFFIX "" )
 set(CLConform_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/test_conformance/spirv_new/CMakeLists.txt b/test_conformance/spirv_new/CMakeLists.txt
index 828d417f..805e851b 100644
--- a/test_conformance/spirv_new/CMakeLists.txt
+++ b/test_conformance/spirv_new/CMakeLists.txt
@@ -40,6 +40,9 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang"
     add_cxx_flag_if_supported(-Wno-narrowing)
 endif()
 
-set(${MODULE_NAME}_SOURCES ${SPIRV_NEW_SOURCES} ${TEST_HARNESS_SOURCES})
+set(${MODULE_NAME}_SOURCES ${${MODULE_NAME}_SOURCES} ${TEST_HARNESS_SOURCES})
 
 include(../CMakeCommon.txt)
+
+add_subdirectory(spirv_asm)
+add_dependencies(${${MODULE_NAME}_OUT} spirv_new_binaries)
diff --git a/test_conformance/spirv_new/spirv_asm/CMakeLists.txt b/test_conformance/spirv_new/spirv_asm/CMakeLists.txt
new file mode 100644
index 00000000..71ae4a9c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/CMakeLists.txt
@@ -0,0 +1,502 @@
+set(SPRIV_TOOLS_DIR ""
+    CACHE
+    PATH
+    "Absolute path to the directory containing the SPIR-V tools to use")
+
+find_package(Python3 COMPONENTS Interpreter QUIET)
+
+find_program(SPIRV_ASSEMBLER
+    NAMES spirv-as
+    HINTS ${SPIRV_TOOLS_DIR})
+
+find_program(SPIRV_VALIDATOR
+    NAMES spirv-val
+    HINTS ${SPIRV_TOOLS_DIR})
+
+if (Python3_FOUND AND
+    NOT ${SPIRV_ASSEMBLER} STREQUAL "SPIRV_ASSEMBLER-NOTFOUND" AND
+    NOT ${SPIRV_VALIDATOR} STREQUAL "SPIRV_VALIDATOR-NOTFOUND")
+    message(STATUS "Using python3: ${Python3_EXECUTABLE}")
+    message(STATUS "Using spirv-as: ${SPIRV_ASSEMBLER}")
+    message(STATUS "Using spirv-val: ${SPIRV_VALIDATOR}")
+else()
+    message(STATUS "Skipping automatic build of SPIR-V files for spirv_new")
+    # Empty custom target
+    add_custom_target(spirv_new_binaries)
+    return()
+endif()
+
+set(spirv_sources
+    assume.spvasm32
+    assume.spvasm64
+    atomic_dec_global.spvasm32
+    atomic_dec_global.spvasm64
+    atomic_inc_global.spvasm32
+    atomic_inc_global.spvasm64
+    basic.spvasm32
+    basic.spvasm64
+    branch_conditional.spvasm32
+    branch_conditional.spvasm64
+    branch_conditional_weighted.spvasm32
+    branch_conditional_weighted.spvasm64
+    branch_simple.spvasm32
+    branch_simple.spvasm64
+    composite_construct_int4.spvasm32
+    composite_construct_int4.spvasm64
+    composite_construct_struct.spvasm32
+    composite_construct_struct.spvasm64
+    constant_char_simple.spvasm32
+    constant_char_simple.spvasm64
+    constant_double_simple.spvasm32
+    constant_double_simple.spvasm64
+    constant_false_simple.spvasm32
+    constant_false_simple.spvasm64
+    constant_float_simple.spvasm32
+    constant_float_simple.spvasm64
+    constant_half_simple.spvasm32
+    constant_half_simple.spvasm64
+    constant_int3_simple.spvasm32
+    constant_int3_simple.spvasm64
+    constant_int4_simple.spvasm32
+    constant_int4_simple.spvasm64
+    constant_int_simple.spvasm32
+    constant_int_simple.spvasm64
+    constant_long_simple.spvasm32
+    constant_long_simple.spvasm64
+    constant_short_simple.spvasm32
+    constant_short_simple.spvasm64
+    constant_struct_int_char_simple.spvasm32
+    constant_struct_int_char_simple.spvasm64
+    constant_struct_int_float_simple.spvasm32
+    constant_struct_int_float_simple.spvasm64
+    constant_struct_struct_simple.spvasm32
+    constant_struct_struct_simple.spvasm64
+    constant_true_simple.spvasm32
+    constant_true_simple.spvasm64
+    constant_uchar_simple.spvasm32
+    constant_uchar_simple.spvasm64
+    constant_uint_simple.spvasm32
+    constant_uint_simple.spvasm64
+    constant_ulong_simple.spvasm32
+    constant_ulong_simple.spvasm64
+    constant_ushort_simple.spvasm32
+    constant_ushort_simple.spvasm64
+    copy_char_simple.spvasm32
+    copy_char_simple.spvasm64
+    copy_double_simple.spvasm32
+    copy_double_simple.spvasm64
+    copy_float_simple.spvasm32
+    copy_float_simple.spvasm64
+    copy_half_simple.spvasm32
+    copy_half_simple.spvasm64
+    copy_int3_simple.spvasm32
+    copy_int3_simple.spvasm64
+    copy_int4_simple.spvasm32
+    copy_int4_simple.spvasm64
+    copy_int_simple.spvasm32
+    copy_int_simple.spvasm64
+    copy_long_simple.spvasm32
+    copy_long_simple.spvasm64
+    copy_short_simple.spvasm32
+    copy_short_simple.spvasm64
+    copy_struct_int_char_simple.spvasm32
+    copy_struct_int_char_simple.spvasm64
+    copy_struct_int_float_simple.spvasm32
+    copy_struct_int_float_simple.spvasm64
+    copy_struct_struct_simple.spvasm32
+    copy_struct_struct_simple.spvasm64
+    copy_uchar_simple.spvasm32
+    copy_uchar_simple.spvasm64
+    copy_uint_simple.spvasm32
+    copy_uint_simple.spvasm64
+    copy_ulong_simple.spvasm32
+    copy_ulong_simple.spvasm64
+    copy_ushort_simple.spvasm32
+    copy_ushort_simple.spvasm64
+    decorate_aliased.spvasm32
+    decorate_aliased.spvasm64
+    decorate_alignment.spvasm32
+    decorate_alignment.spvasm64
+    decorate_constant.spvasm32
+    decorate_constant.spvasm64
+    decorate_constant_fail.spvasm32
+    decorate_constant_fail.spvasm64
+    decorate_cpacked.spvasm32
+    decorate_cpacked.spvasm64
+    decorate_restrict.spvasm32
+    decorate_restrict.spvasm64
+    decorate_rounding_rte_double_long.spvasm32
+    decorate_rounding_rte_double_long.spvasm64
+    decorate_rounding_rte_float_int.spvasm32
+    decorate_rounding_rte_float_int.spvasm64
+    decorate_rounding_rte_half_short.spvasm32
+    decorate_rounding_rte_half_short.spvasm64
+    decorate_rounding_rtn_double_long.spvasm32
+    decorate_rounding_rtn_double_long.spvasm64
+    decorate_rounding_rtn_float_int.spvasm32
+    decorate_rounding_rtn_float_int.spvasm64
+    decorate_rounding_rtn_half_short.spvasm32
+    decorate_rounding_rtn_half_short.spvasm64
+    decorate_rounding_rtp_double_long.spvasm32
+    decorate_rounding_rtp_double_long.spvasm64
+    decorate_rounding_rtp_float_int.spvasm32
+    decorate_rounding_rtp_float_int.spvasm64
+    decorate_rounding_rtp_half_short.spvasm32
+    decorate_rounding_rtp_half_short.spvasm64
+    decorate_rounding_rtz_double_long.spvasm32
+    decorate_rounding_rtz_double_long.spvasm64
+    decorate_rounding_rtz_float_int.spvasm32
+    decorate_rounding_rtz_float_int.spvasm64
+    decorate_rounding_rtz_half_short.spvasm32
+    decorate_rounding_rtz_half_short.spvasm64
+    decorate_saturated_conversion_double_to_int.spvasm32
+    decorate_saturated_conversion_double_to_int.spvasm64
+    decorate_saturated_conversion_double_to_uint.spvasm32
+    decorate_saturated_conversion_double_to_uint.spvasm64
+    decorate_saturated_conversion_float_to_char.spvasm32
+    decorate_saturated_conversion_float_to_char.spvasm64
+    decorate_saturated_conversion_float_to_short.spvasm32
+    decorate_saturated_conversion_float_to_short.spvasm64
+    decorate_saturated_conversion_float_to_uchar.spvasm32
+    decorate_saturated_conversion_float_to_uchar.spvasm64
+    decorate_saturated_conversion_float_to_ushort.spvasm32
+    decorate_saturated_conversion_float_to_ushort.spvasm64
+    decorate_saturated_conversion_half_to_char.spvasm32
+    decorate_saturated_conversion_half_to_char.spvasm64
+    decorate_saturated_conversion_half_to_uchar.spvasm32
+    decorate_saturated_conversion_half_to_uchar.spvasm64
+    expect_bool.spvasm32
+    expect_bool.spvasm64
+    expect_char.spvasm32
+    expect_char.spvasm64
+    expect_int.spvasm32
+    expect_int.spvasm64
+    expect_long.spvasm32
+    expect_long.spvasm64
+    expect_short.spvasm32
+    expect_short.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm64
+    fadd_double.spvasm32
+    fadd_double.spvasm64
+    fadd_double2.spvasm32
+    fadd_double2.spvasm64
+    fadd_float.spvasm32
+    fadd_float.spvasm64
+    fadd_float4.spvasm32
+    fadd_float4.spvasm64
+    fadd_half.spvasm32
+    fadd_half.spvasm64
+    fdiv_double.spvasm32
+    fdiv_double.spvasm64
+    fdiv_double2.spvasm32
+    fdiv_double2.spvasm64
+    fdiv_float.spvasm32
+    fdiv_float.spvasm64
+    fdiv_float4.spvasm32
+    fdiv_float4.spvasm64
+    fdiv_half.spvasm32
+    fdiv_half.spvasm64
+    fmod_double.spvasm32
+    fmod_double.spvasm64
+    fmod_double2.spvasm32
+    fmod_double2.spvasm64
+    fmod_float.spvasm32
+    fmod_float.spvasm64
+    fmod_float4.spvasm32
+    fmod_float4.spvasm64
+    fmod_half.spvasm32
+    fmod_half.spvasm64
+    fmul_double.spvasm32
+    fmul_double.spvasm64
+    fmul_double2.spvasm32
+    fmul_double2.spvasm64
+    fmul_float.spvasm32
+    fmul_float.spvasm64
+    fmul_float4.spvasm32
+    fmul_float4.spvasm64
+    fmul_half.spvasm32
+    fmul_half.spvasm64
+    frem_double.spvasm32
+    frem_double.spvasm64
+    frem_double2.spvasm32
+    frem_double2.spvasm64
+    frem_float.spvasm32
+    frem_float.spvasm64
+    frem_float4.spvasm32
+    frem_float4.spvasm64
+    frem_half.spvasm32
+    frem_half.spvasm64
+    fsub_double.spvasm32
+    fsub_double.spvasm64
+    fsub_double2.spvasm32
+    fsub_double2.spvasm64
+    fsub_float.spvasm32
+    fsub_float.spvasm64
+    fsub_float4.spvasm32
+    fsub_float4.spvasm64
+    fsub_half.spvasm32
+    fsub_half.spvasm64
+    label_simple.spvasm32
+    label_simple.spvasm64
+    lifetime_simple.spvasm32
+    lifetime_simple.spvasm64
+    linkage_export.spvasm32
+    linkage_export.spvasm64
+    linkage_import.spvasm32
+    linkage_import.spvasm64
+    linkage_linkonce_odr_main.spvasm32
+    linkage_linkonce_odr_main.spvasm64
+    linkage_linkonce_odr_noa_main.spvasm32
+    linkage_linkonce_odr_noa_main.spvasm64
+    linkage_linkonce_odr_obj.spvasm32
+    linkage_linkonce_odr_obj.spvasm64
+    loop_merge_branch_conditional_dont_unroll.spvasm32
+    loop_merge_branch_conditional_dont_unroll.spvasm64
+    loop_merge_branch_conditional_none.spvasm32
+    loop_merge_branch_conditional_none.spvasm64
+    loop_merge_branch_conditional_unroll.spvasm32
+    loop_merge_branch_conditional_unroll.spvasm64
+    loop_merge_branch_dont_unroll.spvasm32
+    loop_merge_branch_dont_unroll.spvasm64
+    loop_merge_branch_none.spvasm32
+    loop_merge_branch_none.spvasm64
+    loop_merge_branch_unroll.spvasm32
+    loop_merge_branch_unroll.spvasm64
+    op_function_const.spvasm32
+    op_function_const.spvasm64
+    op_function_inline.spvasm32
+    op_function_inline.spvasm64
+    op_function_noinline.spvasm32
+    op_function_noinline.spvasm64
+    op_function_none.spvasm32
+    op_function_none.spvasm64
+    op_function_pure.spvasm32
+    op_function_pure.spvasm64
+    op_function_pure_ptr.spvasm32
+    op_function_pure_ptr.spvasm64
+    op_neg_double.spvasm32
+    op_neg_double.spvasm64
+    op_neg_float.spvasm32
+    op_neg_float.spvasm64
+    op_neg_float4.spvasm32
+    op_neg_float4.spvasm64
+    op_neg_half.spvasm32
+    op_neg_half.spvasm64
+    op_neg_int.spvasm32
+    op_neg_int.spvasm64
+    op_neg_int4.spvasm32
+    op_neg_int4.spvasm64
+    op_neg_long.spvasm32
+    op_neg_long.spvasm64
+    op_neg_short.spvasm32
+    op_neg_short.spvasm64
+    op_not_int.spvasm32
+    op_not_int.spvasm64
+    op_not_int4.spvasm32
+    op_not_int4.spvasm64
+    op_not_long.spvasm32
+    op_not_long.spvasm64
+    op_not_short.spvasm32
+    op_not_short.spvasm64
+    op_spec_constant_double_simple.spvasm32
+    op_spec_constant_double_simple.spvasm64
+    op_spec_constant_false_simple.spvasm32
+    op_spec_constant_false_simple.spvasm64
+    op_spec_constant_float_simple.spvasm32
+    op_spec_constant_float_simple.spvasm64
+    op_spec_constant_half_simple.spvasm32
+    op_spec_constant_half_simple.spvasm64
+    op_spec_constant_true_simple.spvasm32
+    op_spec_constant_true_simple.spvasm64
+    op_spec_constant_uchar_simple.spvasm32
+    op_spec_constant_uchar_simple.spvasm64
+    op_spec_constant_uint_simple.spvasm32
+    op_spec_constant_uint_simple.spvasm64
+    op_spec_constant_ulong_simple.spvasm32
+    op_spec_constant_ulong_simple.spvasm64
+    op_spec_constant_ushort_simple.spvasm32
+    op_spec_constant_ushort_simple.spvasm64
+    opaque.spvasm32
+    opaque.spvasm64
+    phi_2.spvasm32
+    phi_2.spvasm64
+    phi_3.spvasm32
+    phi_3.spvasm64
+    phi_4.spvasm32
+    phi_4.spvasm64
+    select_if_dont_flatten.spvasm32
+    select_if_dont_flatten.spvasm64
+    select_if_flatten.spvasm32
+    select_if_flatten.spvasm64
+    select_if_none.spvasm32
+    select_if_none.spvasm64
+    select_switch_dont_flatten.spvasm32
+    select_switch_dont_flatten.spvasm64
+    select_switch_flatten.spvasm32
+    select_switch_flatten.spvasm64
+    select_switch_none.spvasm32
+    select_switch_none.spvasm64
+    spv1.1/basic.spvasm32
+    spv1.1/basic.spvasm64
+    spv1.2/basic.spvasm32
+    spv1.2/basic.spvasm64
+    spv1.3/basic.spvasm32
+    spv1.3/basic.spvasm64
+    spv1.4/basic.spvasm32
+    spv1.4/basic.spvasm64
+    spv1.4/copylogical_struct.spvasm32
+    spv1.4/copylogical_struct.spvasm64
+    spv1.4/copymemory_memory_operands.spvasm32
+    spv1.4/copymemory_memory_operands.spvasm64
+    spv1.4/image_operand_signextend.spvasm32
+    spv1.4/image_operand_signextend.spvasm64
+    spv1.4/image_operand_zeroextend.spvasm32
+    spv1.4/image_operand_zeroextend.spvasm64
+    spv1.4/loop_control_iterationmultiple.spvasm32
+    spv1.4/loop_control_iterationmultiple.spvasm64
+    spv1.4/loop_control_maxiterations.spvasm32
+    spv1.4/loop_control_maxiterations.spvasm64
+    spv1.4/loop_control_miniterations.spvasm32
+    spv1.4/loop_control_miniterations.spvasm64
+    spv1.4/loop_control_partialcount.spvasm32
+    spv1.4/loop_control_partialcount.spvasm64
+    spv1.4/loop_control_peelcount.spvasm32
+    spv1.4/loop_control_peelcount.spvasm64
+    spv1.4/no_integer_wrap_decoration_fadd_int.spvasm32
+    spv1.4/no_integer_wrap_decoration_fadd_int.spvasm64
+    spv1.4/no_integer_wrap_decoration_fadd_uint.spvasm32
+    spv1.4/no_integer_wrap_decoration_fadd_uint.spvasm64
+    spv1.4/no_integer_wrap_decoration_fmul_int.spvasm32
+    spv1.4/no_integer_wrap_decoration_fmul_int.spvasm64
+    spv1.4/no_integer_wrap_decoration_fmul_uint.spvasm32
+    spv1.4/no_integer_wrap_decoration_fmul_uint.spvasm64
+    spv1.4/no_integer_wrap_decoration_fnegate_int.spvasm32
+    spv1.4/no_integer_wrap_decoration_fnegate_int.spvasm64
+    spv1.4/no_integer_wrap_decoration_fshiftleft_int.spvasm32
+    spv1.4/no_integer_wrap_decoration_fshiftleft_int.spvasm64
+    spv1.4/no_integer_wrap_decoration_fshiftleft_uint.spvasm32
+    spv1.4/no_integer_wrap_decoration_fshiftleft_uint.spvasm64
+    spv1.4/no_integer_wrap_decoration_fsub_int.spvasm32
+    spv1.4/no_integer_wrap_decoration_fsub_int.spvasm64
+    spv1.4/no_integer_wrap_decoration_fsub_uint.spvasm32
+    spv1.4/no_integer_wrap_decoration_fsub_uint.spvasm64
+    spv1.4/nonwriteable_decoration_function_storage_class.spvasm32
+    spv1.4/nonwriteable_decoration_function_storage_class.spvasm64
+    spv1.4/ptrops.spvasm32
+    spv1.4/ptrops.spvasm64
+    spv1.4/select_struct.spvasm32
+    spv1.4/select_struct.spvasm64
+    spv1.4/usersemantic_decoratestring.spvasm32
+    spv1.4/usersemantic_decoratestring.spvasm64
+    spv1.4/usersemantic_memberdecoratestring.spvasm32
+    spv1.4/usersemantic_memberdecoratestring.spvasm64
+    spv1.5/basic.spvasm32
+    spv1.5/basic.spvasm64
+    spv1.6/basic.spvasm32
+    spv1.6/basic.spvasm64
+    undef_char_simple.spvasm32
+    undef_char_simple.spvasm64
+    undef_double_simple.spvasm32
+    undef_double_simple.spvasm64
+    undef_false_simple.spvasm32
+    undef_false_simple.spvasm64
+    undef_float_simple.spvasm32
+    undef_float_simple.spvasm64
+    undef_half_simple.spvasm32
+    undef_half_simple.spvasm64
+    undef_int3_simple.spvasm32
+    undef_int3_simple.spvasm64
+    undef_int4_simple.spvasm32
+    undef_int4_simple.spvasm64
+    undef_int_simple.spvasm32
+    undef_int_simple.spvasm64
+    undef_long_simple.spvasm32
+    undef_long_simple.spvasm64
+    undef_short_simple.spvasm32
+    undef_short_simple.spvasm64
+    undef_struct_int_char_simple.spvasm32
+    undef_struct_int_char_simple.spvasm64
+    undef_struct_int_float_simple.spvasm32
+    undef_struct_int_float_simple.spvasm64
+    undef_struct_struct_simple.spvasm32
+    undef_struct_struct_simple.spvasm64
+    undef_true_simple.spvasm32
+    undef_true_simple.spvasm64
+    undef_uchar_simple.spvasm32
+    undef_uchar_simple.spvasm64
+    undef_uint_simple.spvasm32
+    undef_uint_simple.spvasm64
+    undef_ulong_simple.spvasm32
+    undef_ulong_simple.spvasm64
+    undef_ushort_simple.spvasm32
+    undef_ushort_simple.spvasm64
+    unreachable_simple.spvasm32
+    unreachable_simple.spvasm64
+    vector_char16_extract.spvasm32
+    vector_char16_extract.spvasm64
+    vector_char16_insert.spvasm32
+    vector_char16_insert.spvasm64
+    vector_double2_extract.spvasm32
+    vector_double2_extract.spvasm64
+    vector_double2_insert.spvasm32
+    vector_double2_insert.spvasm64
+    vector_float4_extract.spvasm32
+    vector_float4_extract.spvasm64
+    vector_float4_insert.spvasm32
+    vector_float4_insert.spvasm64
+    vector_half8_extract.spvasm32
+    vector_half8_extract.spvasm64
+    vector_half8_insert.spvasm32
+    vector_half8_insert.spvasm64
+    vector_int4_extract.spvasm32
+    vector_int4_extract.spvasm64
+    vector_int4_insert.spvasm32
+    vector_int4_insert.spvasm64
+    vector_long2_extract.spvasm32
+    vector_long2_extract.spvasm64
+    vector_long2_insert.spvasm32
+    vector_long2_insert.spvasm64
+    vector_times_scalar_double.spvasm32
+    vector_times_scalar_double.spvasm64
+    vector_times_scalar_float.spvasm32
+    vector_times_scalar_float.spvasm64
+    vector_times_scalar_half.spvasm32
+    vector_times_scalar_half.spvasm64)
+
+set(assembled_spirv_binaries "")
+foreach(spirv_source ${spirv_sources})
+    string(REPLACE ".spvasm" ".spv" assembled_spirv_binary ${spirv_source})
+    set(assembled_spirv_binary "${CMAKE_CURRENT_BINARY_DIR}/../spirv_bin/${assembled_spirv_binary}")
+    list(APPEND assembled_spirv_binaries ${assembled_spirv_binary})
+endforeach()
+
+add_custom_command(
+    OUTPUT ${assembled_spirv_binaries}
+    COMMENT "Generating SPIR-V binaries..."
+    COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/assemble_spirv.py
+            --source-dir "${CMAKE_CURRENT_SOURCE_DIR}"
+            --output-dir "${CMAKE_CURRENT_BINARY_DIR}/../spirv_bin"
+            --assembler "${SPIRV_ASSEMBLER}"
+            --validator "${SPIRV_VALIDATOR}"
+    DEPENDS assemble_spirv.py ${spirv_sources}
+    USES_TERMINAL
+    VERBATIM)
+
+add_custom_target(spirv_new_binaries DEPENDS ${assembled_spirv_binaries})
diff --git a/test_conformance/spirv_new/assemble_spirv.py b/test_conformance/spirv_new/spirv_asm/assemble_spirv.py
similarity index 100%
rename from test_conformance/spirv_new/assemble_spirv.py
rename to test_conformance/spirv_new/spirv_asm/assemble_spirv.py

From 54afc2e7a5b75504a86bd5ec7f14a2dd48eefe3b Mon Sep 17 00:00:00 2001
From: Chuang-Yu Cheng <cy.cheng@imgtec.com>
Date: Wed, 12 Feb 2025 01:46:23 +0900
Subject: [PATCH 05/12] printf: Fix floating-point rounding consistency for RTZ
 devices (#2202)

1. In vector test, prepare RTZ answer for RTZ rounding.

2. In mixed_format_random test, for a given float 'arg', the test
previously used 'arg' directly to generate ref_str:
   ```
     ref_str << str_sprintf(format, arg);
   ```

   This approach incorrectly assumes:
   ```
     (float) arg == to_fp(to_str(arg));
   ```

   However, this assumption fails under RTZ rounding. For example:
   ```
     arg = 0xC642549C
     to_str(arg) = -12437.152343f
     to_fp_rtz(-12437.152343f) = 0xC642549B (-0X1.84A936P+13)
     to_fp_rte(-12437.152343f) = 0xC642549C (-0X1.84A938P+13)
   ```

To address this, the reference result is now computed based on the
literal float string rather than the original 'arg' value.
---
 test_conformance/printf/test_printf.cpp |  6 +++--
 test_conformance/printf/util_printf.cpp | 34 +++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index ef52f044..380878cb 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -317,8 +317,10 @@ cl_program makeMixedFormatPrintfProgram(cl_kernel* kernel_ptr,
         {
             const float max_range = 100000.f;
             float arg = get_random_float(-max_range, max_range, gMTdata);
-            args_str << str_sprintf("%f", arg) << "f, ";
-            ref_str << str_sprintf(format, arg) << ", ";
+            std::string arg_str = str_sprintf("%f", arg);
+            args_str << arg_str << "f, ";
+            float arg_deviceRound = std::stof(arg_str);
+            ref_str << str_sprintf(format, arg_deviceRound) << ", ";
         }
     }
     // Restore the original CPU rounding mode
diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp
index cd84c01a..803f13ab 100644
--- a/test_conformance/printf/util_printf.cpp
+++ b/test_conformance/printf/util_printf.cpp
@@ -1336,6 +1336,33 @@ std::vector<std::string> correctBufferVector = {
     "00512,01024,262144,1048576"
 };
 
+std::vector<std::string> correctBufferVectorRTZ = {
+
+    "1.00,2.00,3.00,4.00",
+
+    "0xfa,0xfb",
+
+    "0x1234,0x8765",
+
+    "0x12345678,0x87654321",
+
+    "12345678,98765432",
+
+    "1.00,2.00,3.00,4.00",
+
+    "1.23e+03,9.87e+05,4.99e-04",
+
+    "0x1p-2,0x1p-1,0x1p+0,0x1.8p+0",
+
+    "1,2,3,4,1.5,3.13999,2.5,3.5",
+
+    "1,2,3,4,5,6,7,10,11,0,40,100,200,400,1000,2000",
+
+    "+1,-2,+3,-4,+5,-6,+7,-8",
+
+    "00512,01024,262144,1048576"
+};
+
 //-----------------------------------------------------------
 
 //Test case for vector                                      |
@@ -1822,7 +1849,14 @@ void generateRef(const cl_device_id device)
             as they're constant and hard-coded
         */
         if (caseToTest->printFN == NULL)
+        {
+            if (caseToTest->_type == TYPE_VECTOR
+                && fpConfigSingle == CL_FP_ROUND_TO_ZERO)
+            {
+                caseToTest->_correctBuffer = correctBufferVectorRTZ;
+            }
             continue;
+        }
 
         // Make sure the reference result is empty
         assert(caseToTest->_correctBuffer.size() == 0);

From 044ec98f664f1a773c6b3c7681f576a18ebc563d Mon Sep 17 00:00:00 2001
From: Ewan Crawford <ewan@codeplay.com>
Date: Tue, 11 Feb 2025 16:47:15 +0000
Subject: [PATCH 06/12] Command-buffer queue compatibility test update (#2230)

Update cl_khr_command_buffer tests to reflect changes from
https://github.com/KhronosGroup/OpenCL-Docs/pull/1292

* Moves negative test for
`CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR` from
command-buffer creation to enqueue.
* Moves negative test for
`CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR` from
command-buffer creation to enqueue.
* Introduces a negative test for `CL_INVALID_DEVICE` on command-buffer
enqueue for new error condition in spec. Although it requires a context
to be contain more than 1 device, which I'm not sure if possible in
current test framework.
* Introduces a new test that created a command-buffer using a queue
without the profiling property set, then enqueues the command-buffer to
a queue with the profiling property set.
* Introduces a new test that creates a command-buffer with an in-order
queue, enqueued on an out-of-order queue.
* Introduces a new test that creates a command-buffer with an
out-of-order queue, enqueued on an in-order queue.
---
 .../basic_command_buffer.cpp                  |   7 +-
 .../basic_command_buffer.h                    |   4 +-
 .../command_buffer_profiling.cpp              | 197 ++++++++-----
 .../command_buffer_queue_substitution.cpp     | 156 ++++++++++
 .../extensions/cl_khr_command_buffer/main.cpp |  10 +-
 .../negative_command_buffer_create.cpp        | 116 --------
 .../negative_command_buffer_enqueue.cpp       | 272 +++++++++++++-----
 .../extensions/cl_khr_command_buffer/procs.h  |  35 ++-
 8 files changed, 523 insertions(+), 274 deletions(-)

diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
index 803daf6b..39e17762 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
@@ -61,10 +61,9 @@ bool BasicCommandBufferTest::Skip()
                "CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR");
 
     cl_command_queue_properties queue_properties;
-    error = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
-                                  sizeof(queue_properties), &queue_properties,
-                                  NULL);
-    test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
+    error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
+                            sizeof(queue_properties), &queue_properties, NULL);
+    test_error(error, "Unable to query CL_DEVICE_QUEUE_PROPERTIES");
     queue_out_of_order_support =
         queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
 
diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
index aa902013..f663e0a9 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
@@ -104,9 +104,9 @@ int MakeAndRunTest(cl_device_id device, cl_context context,
         cl_version extension_version =
             get_extension_version(device, "cl_khr_command_buffer");
 
-        if (extension_version != CL_MAKE_VERSION(0, 9, 6))
+        if (extension_version != CL_MAKE_VERSION(0, 9, 7))
         {
-            log_info("cl_khr_command_buffer version 0.9.6 is required to run "
+            log_info("cl_khr_command_buffer version 0.9.7 is required to run "
                      "the test, skipping.\n ");
             return TEST_SKIPPED_ITSELF;
         }
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp
index e715ddc9..199d2261 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp
@@ -21,11 +21,75 @@
 
 namespace {
 
+#define ADD_PROF_PARAM(prop)                                                   \
+    {                                                                          \
+        prop, #prop, 0                                                         \
+    }
+
+struct ProfilingParam
+{
+    cl_profiling_info param;
+    std::string name;
+    cl_ulong value;
+};
+
+cl_int VerifyResult(const clEventWrapper& event)
+{
+    cl_int error = CL_SUCCESS;
+    cl_int status;
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "clGetEventInfo() failed");
+
+    if (status != CL_SUCCESS)
+        test_fail("Kernel execution status %d! (%s:%d)\n", status, __FILE__,
+                  __LINE__);
+
+    std::vector<ProfilingParam> prof_params = {
+        ADD_PROF_PARAM(CL_PROFILING_COMMAND_QUEUED),
+        ADD_PROF_PARAM(CL_PROFILING_COMMAND_SUBMIT),
+        ADD_PROF_PARAM(CL_PROFILING_COMMAND_START),
+        ADD_PROF_PARAM(CL_PROFILING_COMMAND_END),
+    };
+
+    // gather profiling timestamps
+    for (auto&& p : prof_params)
+    {
+        error = clGetEventProfilingInfo(event, p.param, sizeof(p.value),
+                                        &p.value, NULL);
+        test_error(error, "clGetEventProfilingInfo() failed");
+    }
+
+    // verify the results by comparing timestamps
+    bool all_vals_0 = prof_params.front().value != 0;
+    for (size_t i = 1; i < prof_params.size(); i++)
+    {
+        all_vals_0 = (prof_params[i].value != 0) ? false : all_vals_0;
+        if (prof_params[i - 1].value > prof_params[i].value)
+        {
+            log_error("Profiling %s=0x%x should be smaller than or equal "
+                      "to %s=0x%x for "
+                      "kernels that use the on-device queue",
+                      prof_params[i - 1].name.c_str(), prof_params[i - 1].param,
+                      prof_params[i].name.c_str(), prof_params[i].param);
+            return TEST_FAIL;
+        }
+    }
+
+    if (all_vals_0)
+    {
+        log_error("All values are 0. This is exceedingly unlikely.\n");
+        return TEST_FAIL;
+    }
+
+    log_info("Profiling info for command-buffer kernel succeeded.\n");
+    return TEST_PASS;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // Command-buffer profiling test cases:
 // -all commands are recorded to a single command-queue
 // -profiling a command-buffer with simultaneous use
-
 template <bool simultaneous_request>
 struct CommandBufferProfiling : public BasicCommandBufferTest
 {
@@ -133,73 +197,6 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
         return CL_SUCCESS;
     }
 
-    //--------------------------------------------------------------------------
-#define ADD_PROF_PARAM(prop)                                                   \
-    {                                                                          \
-        prop, #prop, 0                                                         \
-    }
-    struct ProfilingParam
-    {
-        cl_profiling_info param;
-        std::string name;
-        cl_ulong value;
-    };
-
-    //--------------------------------------------------------------------------
-    cl_int VerifyResult(const clEventWrapper& event)
-    {
-        cl_int error = CL_SUCCESS;
-        cl_int status;
-        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
-                               sizeof(status), &status, NULL);
-        test_error(error, "clGetEventInfo() failed");
-
-        if (status != CL_SUCCESS)
-            test_fail("Kernel execution status %d! (%s:%d)\n", status, __FILE__,
-                      __LINE__);
-
-        std::vector<ProfilingParam> prof_params = {
-            ADD_PROF_PARAM(CL_PROFILING_COMMAND_QUEUED),
-            ADD_PROF_PARAM(CL_PROFILING_COMMAND_SUBMIT),
-            ADD_PROF_PARAM(CL_PROFILING_COMMAND_START),
-            ADD_PROF_PARAM(CL_PROFILING_COMMAND_END),
-        };
-
-        // gather profiling timestamps
-        for (auto&& p : prof_params)
-        {
-            error = clGetEventProfilingInfo(event, p.param, sizeof(p.value),
-                                            &p.value, NULL);
-            test_error(error, "clGetEventProfilingInfo() failed");
-        }
-
-        // verify the results by comparing timestamps
-        bool all_vals_0 = prof_params.front().value != 0;
-        for (size_t i = 1; i < prof_params.size(); i++)
-        {
-            all_vals_0 = (prof_params[i].value != 0) ? false : all_vals_0;
-            if (prof_params[i - 1].value > prof_params[i].value)
-            {
-                log_error("Profiling %s=0x%x should be smaller than or equal "
-                          "to %s=0x%x for "
-                          "kernels that use the on-device queue",
-                          prof_params[i - 1].name.c_str(),
-                          prof_params[i - 1].param, prof_params[i].name.c_str(),
-                          prof_params[i].param);
-                return TEST_FAIL;
-            }
-        }
-
-        if (all_vals_0)
-        {
-            log_error("All values are 0. This is exceedingly unlikely.\n");
-            return TEST_FAIL;
-        }
-
-        log_info("Profiling info for command-buffer kernel succeeded.\n");
-        return TEST_PASS;
-    }
-
     //--------------------------------------------------------------------------
     cl_int RunSingle()
     {
@@ -301,6 +298,63 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
     const cl_int pattern = 0xA;
 };
 
+// Test that we can create a command-buffer using a queue without the profiling
+// property, which is enqueued to an queue with the profiling property, and
+// the event returned can queried for profiling info.
+struct CommandBufferSubstituteQueueProfiling : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        clEventWrapper event;
+        error = clEnqueueCommandBufferKHR(1, &profiling_queue, command_buffer,
+                                          0, nullptr, &event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clFinish(profiling_queue);
+        test_error(error, "clFinish failed");
+
+        error = VerifyResult(event);
+        test_error(error, "VerifyResult failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_command_queue_properties supported_properties;
+        cl_int error = clGetDeviceInfo(
+            device, CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR,
+            sizeof(supported_properties), &supported_properties, NULL);
+        test_error(error,
+                   "Unable to query "
+                   "CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR");
+
+        // CL_QUEUE_PROFILING_ENABLE is mandated minimum property returned by
+        // CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR
+        if (!(supported_properties & CL_QUEUE_PROFILING_ENABLE))
+        {
+            return TEST_FAIL;
+        }
+
+        profiling_queue = clCreateCommandQueue(
+            context, device, CL_QUEUE_PROFILING_ENABLE, &error);
+        test_error(error, "clCreateCommandQueue failed");
+
+        return BasicCommandBufferTest::SetUp(elements);
+    }
+
+    clCommandQueueWrapper profiling_queue = nullptr;
+};
 } // anonymous namespace
 
 int test_basic_profiling(cl_device_id device, cl_context context,
@@ -316,3 +370,10 @@ int test_simultaneous_profiling(cl_device_id device, cl_context context,
     return MakeAndRunTest<CommandBufferProfiling<true>>(device, context, queue,
                                                         num_elements);
 }
+
+int test_substitute_queue_profiling(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CommandBufferSubstituteQueueProfiling>(
+        device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_queue_substitution.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_queue_substitution.cpp
index 7aa262aa..1f74485b 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_queue_substitution.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_queue_substitution.cpp
@@ -252,6 +252,148 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
     clEventWrapper user_event;
 };
 
+// Command-queue substitution tests which handles below cases:
+// * Template param is true - Create a command-buffer with an in-order queue,
+//   and enqueue command-buffer to an out-of-order queue.
+// * Template param is false - Create a command-buffer with an out-of-order
+//   queue, and enqueue command-buffer to an in-order queue.
+template <bool is_ooo_test>
+struct QueueOrderTest : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    QueueOrderTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue), ooo_queue(nullptr),
+          ooo_command_buffer(this)
+    {}
+
+    cl_int RecordOutOfOrderCommandBuffer()
+    {
+        cl_sync_point_khr sync_points[2];
+        const cl_int pattern = pattern_pri;
+        cl_int error =
+            clCommandFillBufferKHR(ooo_command_buffer, nullptr, nullptr, in_mem,
+                                   &pattern, sizeof(cl_int), 0, data_size(), 0,
+                                   nullptr, &sync_points[0], nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandFillBufferKHR(ooo_command_buffer, nullptr, nullptr,
+                                       out_mem, &overwritten_pattern,
+                                       sizeof(cl_int), 0, data_size(), 0,
+                                       nullptr, &sync_points[1], nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandNDRangeKernelKHR(
+            ooo_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+            &num_elements, nullptr, 2, sync_points, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_int RecordInOrderCommandBuffer()
+    {
+        const cl_int pattern = pattern_pri;
+        cl_int error = clCommandFillBufferKHR(
+            command_buffer, nullptr, nullptr, in_mem, &pattern, sizeof(cl_int),
+            0, data_size(), 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandFillBufferKHR(
+            command_buffer, nullptr, nullptr, out_mem, &overwritten_pattern,
+            sizeof(cl_int), 0, data_size(), 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_int Run() override
+    {
+        cl_int error = CL_SUCCESS;
+        if (is_ooo_test)
+        {
+            // command-buffer created in-order, but executed on ooo queue
+            error = RecordInOrderCommandBuffer();
+            test_error(error, "RecordInOrderCommandBuffer failed");
+        }
+        else
+        {
+            // command-buffer created ooo with sync point deps, but
+            // executed on in-order queue
+            error = RecordOutOfOrderCommandBuffer();
+            test_error(error, "RecordOutOfOrderCommandBuffer failed");
+        }
+
+        clCommandBufferWrapper& test_command_buffer =
+            is_ooo_test ? command_buffer : ooo_command_buffer;
+        error = clFinalizeCommandBufferKHR(test_command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        clCommandQueueWrapper& test_queue = is_ooo_test ? ooo_queue : queue;
+        error = clEnqueueCommandBufferKHR(1, &test_queue, test_command_buffer,
+                                          0, nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clFinish(test_queue);
+        test_error(error, "clFinish failed");
+
+        // Verify output
+        std::vector<cl_int> output_buffer(num_elements);
+        error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
+                                    output_buffer.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_pri, output_buffer[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        ooo_queue = clCreateCommandQueue(
+            context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
+        test_error(error,
+                   "clCreateCommandQueue with "
+                   "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE failed");
+
+        ooo_command_buffer =
+            clCreateCommandBufferKHR(1, &ooo_queue, nullptr, &error);
+        test_error(error, "clCreateCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        if (BasicCommandBufferTest::Skip()) return true;
+
+        // Skip if we want to enqueue to an out-of-order command-queue,
+        // and this isn't supported.
+        bool skip = is_ooo_test ? !out_of_order_support : false;
+
+        // Skip if device doesn't support out-of-order queues, we need
+        // to create one for both instantiations of the test.
+        return skip || !queue_out_of_order_support;
+    }
+
+    clCommandQueueWrapper ooo_queue;
+    clCommandBufferWrapper ooo_command_buffer;
+
+    const cl_int overwritten_pattern = 0xACDC;
+    const cl_int pattern_pri = 42;
+};
 } // anonymous namespace
 
 int test_queue_substitution(cl_device_id device, cl_context context,
@@ -276,3 +418,17 @@ int test_simultaneous_queue_substitution(cl_device_id device,
     return MakeAndRunTest<SubstituteQueueTest<false, true>>(
         device, context, queue, num_elements);
 }
+
+int test_queue_substitute_in_order(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<QueueOrderTest<false>>(device, context, queue,
+                                                 num_elements);
+}
+
+int test_queue_substitute_out_of_order(cl_device_id device, cl_context context,
+                                       cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<QueueOrderTest<true>>(device, context, queue,
+                                                num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
index 93e1448e..941a9bb0 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/main.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
@@ -30,6 +30,7 @@ test_definition test_list[] = {
     ADD_TEST(info_context),
     ADD_TEST(basic_profiling),
     ADD_TEST(simultaneous_profiling),
+    ADD_TEST(substitute_queue_profiling),
     ADD_TEST(regular_wait_for_command_buffer),
     ADD_TEST(command_buffer_wait_for_command_buffer),
     ADD_TEST(command_buffer_wait_for_sec_command_buffer),
@@ -44,6 +45,8 @@ test_definition test_list[] = {
     ADD_TEST(queue_substitution),
     ADD_TEST(properties_queue_substitution),
     ADD_TEST(simultaneous_queue_substitution),
+    ADD_TEST(queue_substitute_in_order),
+    ADD_TEST(queue_substitute_out_of_order),
     ADD_TEST(fill_image),
     ADD_TEST(fill_buffer),
     ADD_TEST(fill_svm_buffer),
@@ -93,9 +96,6 @@ test_definition test_list[] = {
     ADD_TEST(negative_create_command_buffer_null_queues),
     ADD_TEST(negative_create_command_buffer_repeated_properties),
     ADD_TEST(negative_create_command_buffer_not_supported_properties),
-    ADD_TEST(negative_create_command_buffer_queue_without_min_properties),
-    ADD_TEST(
-        negative_create_command_buffer_device_does_not_support_out_of_order_queue),
     ADD_TEST(negative_command_ndrange_queue_not_null),
     ADD_TEST(negative_command_ndrange_kernel_with_different_context),
     ADD_TEST(negative_command_ndrange_kernel_sync_points_null_or_num_zero),
@@ -155,10 +155,12 @@ test_definition test_list[] = {
     ADD_TEST(
         negative_enqueue_command_buffer_num_queues_not_zero_different_while_buffer_creation),
     ADD_TEST(negative_enqueue_command_buffer_not_valid_queue_in_queues),
-    ADD_TEST(negative_enqueue_queue_not_compatible),
     ADD_TEST(negative_enqueue_queue_with_different_context),
     ADD_TEST(negative_enqueue_command_buffer_different_context_than_event),
     ADD_TEST(negative_enqueue_event_wait_list_null_or_events_null),
+    ADD_TEST(negative_enqueue_queue_without_reqd_properties),
+    ADD_TEST(negative_enqueue_with_unsupported_queue_property),
+    ADD_TEST(negative_enqueue_inconsistent_device),
 };
 
 int main(int argc, const char *argv[])
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_create.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_create.cpp
index 72e50e66..ecb2da39 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_create.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_create.cpp
@@ -201,105 +201,6 @@ struct CreateCommandBufferNotSupportedProperties : public BasicCommandBufferTest
 
     cl_command_buffer_properties_khr unsupported_prop = 0;
 };
-
-// CL_INCOMPATIBLE_COMMAND_QUEUE_KHR if the properties of any command-queue in
-// queues does not contain the minimum properties specified by
-// CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR.
-struct CreateCommandBufferQueueWithoutMinProperties
-    : public BasicCommandBufferTest
-{
-    using BasicCommandBufferTest::BasicCommandBufferTest;
-
-    cl_int Run() override
-    {
-        cl_int error = CL_SUCCESS;
-
-        command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
-        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
-                               "clCreateCommandBufferKHR should return "
-                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
-                               TEST_FAIL);
-
-        return CL_SUCCESS;
-    }
-
-    bool Skip() override
-    {
-        if (BasicCommandBufferTest::Skip()) return true;
-
-        cl_command_queue_properties required_properties;
-        cl_int error = clGetDeviceInfo(
-            device, CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR,
-            sizeof(required_properties), &required_properties, NULL);
-        test_error(error,
-                   "Unable to query "
-                   "CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR");
-
-        cl_command_queue_properties queue_properties;
-        error = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
-                                      sizeof(queue_properties),
-                                      &queue_properties, NULL);
-        test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
-
-        // Skip if queue properties contains those required
-        return required_properties == (required_properties & queue_properties);
-    }
-};
-
-// CL_INCOMPATIBLE_COMMAND_QUEUE_KHR if any command-queue in queues is an
-// out-of-order command-queue and the device associated with the command-queue
-// does not return CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE from
-// CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR
-struct CreateCommandBufferDeviceDoesNotSupportOutOfOderQueue
-    : public BasicCommandBufferTest
-{
-    CreateCommandBufferDeviceDoesNotSupportOutOfOderQueue(
-        cl_device_id device, cl_context context, cl_command_queue queue)
-        : BasicCommandBufferTest(device, context, queue),
-          out_of_order_queue(nullptr)
-    {}
-
-    cl_int Run() override
-    {
-        cl_int error = CL_SUCCESS;
-
-        command_buffer =
-            clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
-        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
-                               "clCreateCommandBufferKHR should return "
-                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
-                               TEST_FAIL);
-
-        return CL_SUCCESS;
-    }
-
-    cl_int SetUp(int elements) override
-    {
-        cl_int error = CL_SUCCESS;
-
-        error = BasicCommandBufferTest::SetUp(elements);
-        test_error(error, "BasicCommandBufferTest::SetUp failed");
-
-        out_of_order_queue = clCreateCommandQueue(
-            context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
-        test_error(error,
-                   "clCreateCommandQueue with "
-                   "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE failed");
-
-        return CL_SUCCESS;
-    }
-
-    bool Skip() override
-    {
-        if (BasicCommandBufferTest::Skip()) return true;
-
-        // If device does not support out of order queue or if device supports
-        // out of order command buffer test should be skipped
-        return !queue_out_of_order_support || out_of_order_support;
-    }
-
-    clCommandQueueWrapper out_of_order_queue;
-};
 };
 
 int test_negative_create_command_buffer_num_queues(cl_device_id device,
@@ -335,20 +236,3 @@ int test_negative_create_command_buffer_not_supported_properties(
     return MakeAndRunTest<CreateCommandBufferNotSupportedProperties>(
         device, context, queue, num_elements);
 }
-
-int test_negative_create_command_buffer_queue_without_min_properties(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    int num_elements)
-{
-    return MakeAndRunTest<CreateCommandBufferQueueWithoutMinProperties>(
-        device, context, queue, num_elements);
-}
-
-int test_negative_create_command_buffer_device_does_not_support_out_of_order_queue(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    int num_elements)
-{
-    return MakeAndRunTest<
-        CreateCommandBufferDeviceDoesNotSupportOutOfOderQueue>(
-        device, context, queue, num_elements);
-}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp
index f13836fb..cdc9398a 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp
@@ -16,7 +16,6 @@
 #include "basic_command_buffer.h"
 #include "procs.h"
 
-
 //--------------------------------------------------------------------------
 namespace {
 
@@ -293,63 +292,6 @@ struct EnqueueCommandBufferNotValidQueueInQueues : public BasicCommandBufferTest
     }
 };
 
-// CL_INCOMPATIBLE_COMMAND_QUEUE_KHR if any element of queues is not compatible
-// with the command-queue set on command_buffer creation at the same list index.
-struct EnqueueCommandBufferQueueNotCompatible : public BasicCommandBufferTest
-{
-    EnqueueCommandBufferQueueNotCompatible(cl_device_id device,
-                                           cl_context context,
-                                           cl_command_queue queue)
-        : BasicCommandBufferTest(device, context, queue),
-          queue_not_compatible(nullptr)
-    {}
-
-    cl_int Run() override
-    {
-        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
-        test_error(error, "clFinalizeCommandBufferKHR failed");
-
-        error = clEnqueueCommandBufferKHR(1, &queue_not_compatible,
-                                          command_buffer, 0, nullptr, nullptr);
-
-        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
-                               "clEnqueueCommandBufferKHR should return "
-                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
-                               TEST_FAIL);
-
-        return CL_SUCCESS;
-    }
-
-    cl_int SetUp(int elements) override
-    {
-        cl_int error = BasicCommandBufferTest::SetUp(elements);
-        test_error(error, "BasicCommandBufferTest::SetUp failed");
-
-        queue_not_compatible = clCreateCommandQueue(
-            context, device, CL_QUEUE_PROFILING_ENABLE, &error);
-        test_error(error, "clCreateCommandQueue failed");
-
-        cl_command_queue_properties queue_properties;
-        error = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
-                                      sizeof(queue_properties),
-                                      &queue_properties, NULL);
-        test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
-
-        cl_command_queue_properties queue_not_compatible_properties;
-        error = clGetCommandQueueInfo(queue_not_compatible, CL_QUEUE_PROPERTIES,
-                                      sizeof(queue_not_compatible_properties),
-                                      &queue_not_compatible_properties, NULL);
-        test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
-
-        test_assert_error(queue_properties != queue_not_compatible_properties,
-                          "Queues properties must be different");
-
-        return CL_SUCCESS;
-    }
-
-    clCommandQueueWrapper queue_not_compatible;
-};
-
 // CL_INVALID_CONTEXT if any element of queues does not have the same context as
 // the command-queue set on command_buffer creation at the same list index.
 struct EnqueueCommandBufferQueueWithDifferentContext
@@ -491,6 +433,185 @@ struct EnqueueCommandBufferEventWaitListNullOrEventsNull
         return CL_SUCCESS;
     }
 };
+
+// CL_INCOMPATIBLE_COMMAND_QUEUE_KHR if the properties of any command-queue in
+// queues does not contain the minimum properties specified by
+// CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR.
+struct EnqueueCommandBufferQueueWithoutReqdProperties
+    : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
+                               "clEnqueueCommandBufferKHR should return "
+                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
+                               TEST_FAIL);
+
+        error = clEnqueueCommandBufferKHR(1, &queue, command_buffer, 0, nullptr,
+                                          nullptr);
+        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
+                               "clEnqueueCommandBufferKHR should return "
+                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        // Omit BasicCommandBufferTest::Skip() here because it skips
+        // if we don't have required properties, which is what we want to
+        // test an error for.
+
+        cl_command_queue_properties required_properties;
+        cl_int error = clGetDeviceInfo(
+            device, CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR,
+            sizeof(required_properties), &required_properties, NULL);
+        test_error(error,
+                   "Unable to query "
+                   "CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR");
+
+        cl_command_queue_properties queue_properties;
+        error = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
+                                      sizeof(queue_properties),
+                                      &queue_properties, NULL);
+        test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
+
+        // Skip if queue properties contains those required
+        return required_properties == (required_properties & queue_properties);
+    }
+};
+
+// CL_INCOMPATIBLE_COMMAND_QUEUE_KHR if any command-queue in queues is an
+// out-of-order command-queue and the device associated with the command-queue
+// does not return CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE from
+// CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR
+struct EnqueueCommandBufferWithUnsupportedQueueProperty
+    : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(1, &out_of_order_queue,
+                                          command_buffer, 0, nullptr, nullptr);
+        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
+                               "clEnqueueCommandBufferKHR should return "
+                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
+                               TEST_FAIL);
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        out_of_order_queue = clCreateCommandQueue(
+            context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
+        test_error(error,
+                   "clCreateCommandQueue with "
+                   "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE failed");
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        if (BasicCommandBufferTest::Skip()) return true;
+
+        // If device does not support out of order queue or if device supports
+        // out of order command buffer test should be skipped
+        return !queue_out_of_order_support || out_of_order_support;
+    }
+
+    clCommandQueueWrapper out_of_order_queue = nullptr;
+};
+
+// CL_INVALID_DEVICE if any element of queues does not have the same device
+// as the command-queue set on command_buffer creation at the
+// same list index.
+struct EnqueueCommandBufferInconsistentDevice : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(1, &second_device_queue,
+                                          command_buffer, 0, nullptr, nullptr);
+        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
+                               "clEnqueueCommandBufferKHR should return "
+                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
+                               TEST_FAIL);
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        cl_device_id second_device = nullptr;
+        for (auto query_device : devices)
+        {
+            if (query_device != device)
+            {
+                second_device = query_device;
+                break;
+            }
+        }
+
+        test_assert_error(second_device != nullptr,
+                          "Second device not found for testing");
+
+        second_device_queue =
+            clCreateCommandQueue(context, second_device, 0, &error);
+        test_error(error, "clCreateCommandQueue failed");
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        if (BasicCommandBufferTest::Skip()) return true;
+
+        size_t context_devices_size;
+        cl_int error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
+                                        &context_devices_size);
+        test_error(error, "clGetContextInfo failed");
+
+        size_t num_devices = context_devices_size / sizeof(cl_device_id);
+
+        if (num_devices < 2)
+        {
+            // We need a second device for test
+            return true;
+        }
+
+        devices.resize(num_devices);
+        error = clGetContextInfo(context, CL_CONTEXT_DEVICES, num_devices,
+                                 devices.data(), nullptr);
+        test_error(error, "clGetContextInfo failed");
+
+        return false;
+    }
+
+    std::vector<cl_device_id> devices;
+    clCommandQueueWrapper second_device_queue = nullptr;
+};
 };
 
 int test_negative_enqueue_command_buffer_invalid_command_buffer(
@@ -544,15 +665,6 @@ int test_negative_enqueue_command_buffer_not_valid_queue_in_queues(
         device, context, queue, num_elements);
 }
 
-int test_negative_enqueue_queue_not_compatible(cl_device_id device,
-                                               cl_context context,
-                                               cl_command_queue queue,
-                                               int num_elements)
-{
-    return MakeAndRunTest<EnqueueCommandBufferQueueNotCompatible>(
-        device, context, queue, num_elements);
-}
-
 int test_negative_enqueue_queue_with_different_context(cl_device_id device,
                                                        cl_context context,
                                                        cl_command_queue queue,
@@ -577,3 +689,29 @@ int test_negative_enqueue_event_wait_list_null_or_events_null(
     return MakeAndRunTest<EnqueueCommandBufferEventWaitListNullOrEventsNull>(
         device, context, queue, num_elements);
 }
+
+int test_negative_enqueue_queue_without_reqd_properties(cl_device_id device,
+                                                        cl_context context,
+                                                        cl_command_queue queue,
+                                                        int num_elements)
+{
+    return MakeAndRunTest<EnqueueCommandBufferQueueWithoutReqdProperties>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_enqueue_with_unsupported_queue_property(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<EnqueueCommandBufferWithUnsupportedQueueProperty>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_enqueue_inconsistent_device(cl_device_id device,
+                                              cl_context context,
+                                              cl_command_queue queue,
+                                              int num_elements)
+{
+    return MakeAndRunTest<EnqueueCommandBufferInconsistentDevice>(
+        device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h
index 2a1e199b..ba89de8e 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/procs.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h
@@ -91,6 +91,10 @@ extern int test_basic_profiling(cl_device_id device, cl_context context,
 extern int test_simultaneous_profiling(cl_device_id device, cl_context context,
                                        cl_command_queue queue,
                                        int num_elements);
+extern int test_substitute_queue_profiling(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int num_elements);
 extern int test_queue_substitution(cl_device_id device, cl_context context,
                                    cl_command_queue queue, int num_elements);
 extern int test_properties_queue_substitution(cl_device_id device,
@@ -101,6 +105,14 @@ extern int test_simultaneous_queue_substitution(cl_device_id device,
                                                 cl_context context,
                                                 cl_command_queue queue,
                                                 int num_elements);
+extern int test_queue_substitute_in_order(cl_device_id device,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
+extern int test_queue_substitute_out_of_order(cl_device_id device,
+                                              cl_context context,
+                                              cl_command_queue queue,
+                                              int num_elements);
 extern int test_fill_image(cl_device_id device, cl_context context,
                            cl_command_queue queue, int num_elements);
 extern int test_fill_buffer(cl_device_id device, cl_context context,
@@ -211,13 +223,6 @@ extern int test_negative_create_command_buffer_repeated_properties(
 extern int test_negative_create_command_buffer_not_supported_properties(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
-extern int test_negative_create_command_buffer_queue_without_min_properties(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int
-test_negative_create_command_buffer_device_does_not_support_out_of_order_queue(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    int num_elements);
 extern int test_negative_command_ndrange_queue_not_null(cl_device_id device,
                                                         cl_context context,
                                                         cl_command_queue queue,
@@ -383,10 +388,6 @@ extern int test_negative_command_buffer_copy_image_mutable_handle_not_null(
 extern int test_negative_enqueue_command_buffer_not_valid_queue_in_queues(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
-extern int test_negative_enqueue_queue_not_compatible(cl_device_id device,
-                                                      cl_context context,
-                                                      cl_command_queue queue,
-                                                      int num_elements);
 extern int test_negative_enqueue_queue_with_different_context(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
@@ -396,6 +397,14 @@ extern int test_negative_enqueue_command_buffer_different_context_than_event(
 extern int test_negative_enqueue_event_wait_list_null_or_events_null(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
-
-
+extern int test_negative_enqueue_queue_without_reqd_properties(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_enqueue_with_unsupported_queue_property(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_enqueue_inconsistent_device(cl_device_id device,
+                                                     cl_context context,
+                                                     cl_command_queue queue,
+                                                     int num_elements);
 #endif // CL_KHR_COMMAND_BUFFER_PROCS_H

From 7188c4b29b9fbe6797d9440bd1c7b92ab6a95d28 Mon Sep 17 00:00:00 2001
From: Sreelakshmi Haridas Maruthur <quic_sharidas@quicinc.com>
Date: Tue, 11 Feb 2025 09:49:34 -0700
Subject: [PATCH 07/12] allocations: Make buffer kernel more efficient for
 multiple allocations (#2235)

- Fix malloc for 'access_string' and 'kernel_string'.
- Fix typo in 'number_of_work_itmes'.

Co-authored-by: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
---
 .../allocations/allocation_execute.cpp        | 57 ++++++++++++-------
 test_conformance/allocations/main.cpp         |  6 +-
 2 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/test_conformance/allocations/allocation_execute.cpp b/test_conformance/allocations/allocation_execute.cpp
index f01dfd8f..3af7f85e 100644
--- a/test_conformance/allocations/allocation_execute.cpp
+++ b/test_conformance/allocations/allocation_execute.cpp
@@ -26,13 +26,18 @@ const char *buffer_kernel_pattern = {
     "\tint tid = get_global_id(0);\n"
     "\tuint r = 0;\n"
     "\t%s i;\n"
-    "\tfor(i=(%s)tid*(%s)per_item; i<(%s)(1+tid)*(%s)per_item; i++) {\n"
     "%s"
-    "\t}\n"
     "\tresult[tid] = r;\n"
     "}\n"
 };
 
+const char *accumulate_pattern = {
+    "\t%s end%d = min((%s)(1+tid)*(%s)per_item, array_sizes[%d]);\n"
+    "\tfor(i=(%s)tid*(%s)per_item; i<end%d; i++) {\n"
+    "\t\tr += buffer%d[i];\n"
+    "\t}\n"
+};
+
 const char *image_kernel_pattern = {
     "__kernel void sample_test(%s __global uint *result)\n"
     "{\n"
@@ -159,7 +164,7 @@ int check_image(cl_command_queue queue, cl_mem mem)
 int execute_kernel(cl_context context, cl_command_queue *queue,
                    cl_device_id device_id, int test, cl_mem mems[],
                    int number_of_mems_used, int verify_checksum,
-                   unsigned int number_of_work_itmes)
+                   unsigned int number_of_work_items)
 {
 
     char *argument_string;
@@ -174,24 +179,38 @@ int execute_kernel(cl_context context, cl_command_queue *queue,
     cl_uint per_item;
     cl_uint per_item_uint;
     cl_uint final_result;
-    std::vector<cl_uint> returned_results(number_of_work_itmes);
+    std::vector<cl_uint> returned_results(number_of_work_items);
     clEventWrapper event;
     cl_int event_status;
 
     // Allocate memory for the kernel source
+    char *used_pattern = nullptr;
+    if (test == BUFFER || test == BUFFER_NON_BLOCKING)
+    {
+        used_pattern = (char *)accumulate_pattern;
+    }
+    else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING)
+    {
+        used_pattern = (char *)read_pattern;
+    }
+    else if (test == IMAGE_WRITE || test == IMAGE_WRITE_NON_BLOCKING)
+    {
+        used_pattern = (char *)write_pattern;
+    }
     argument_string =
         (char *)malloc(sizeof(char) * MAX_NUMBER_TO_ALLOCATE * 64);
     access_string = (char *)malloc(sizeof(char) * MAX_NUMBER_TO_ALLOCATE
-                                   * (strlen(read_pattern) + 10));
+                                   * (strlen(used_pattern) + 10));
     kernel_string = (char *)malloc(sizeof(char) * MAX_NUMBER_TO_ALLOCATE
-                                       * (strlen(read_pattern) + 10 + 64)
+                                       * (strlen(used_pattern) + 10 + 64)
                                    + 1024);
+
     argument_string[0] = '\0';
     access_string[0] = '\0';
     kernel_string[0] = '\0';
 
     // Zero the results.
-    for (i = 0; i < number_of_work_itmes; i++) returned_results[i] = 0;
+    for (i = 0; i < number_of_work_items; i++) returned_results[i] = 0;
 
     // detect if device supports ulong/int64
     // detect whether profile of the device is embedded
@@ -209,13 +228,6 @@ int execute_kernel(cl_context context, cl_command_queue *queue,
     // Build the kernel source
     if (test == BUFFER || test == BUFFER_NON_BLOCKING)
     {
-        for (i = 0; i < number_of_mems_used; i++)
-        {
-            sprintf(argument_string + strlen(argument_string),
-                    " __global uint *buffer%d, ", i);
-            sprintf(access_string + strlen(access_string),
-                    "\t\tif (i<array_sizes[%d]) r += buffer%d[i];\n", i, i);
-        }
         char type[10];
         if (support64)
         {
@@ -225,8 +237,15 @@ int execute_kernel(cl_context context, cl_command_queue *queue,
         {
             sprintf(type, "uint");
         }
+        for (i = 0; i < number_of_mems_used; i++)
+        {
+            sprintf(argument_string + strlen(argument_string),
+                    " __global uint *buffer%d, ", i);
+            sprintf(access_string + strlen(access_string), accumulate_pattern,
+                    type, i, type, type, i, type, type, i, i);
+        }
         sprintf(kernel_string, buffer_kernel_pattern, argument_string, type,
-                type, type, type, type, type, access_string);
+                type, access_string);
     }
     else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING)
     {
@@ -282,14 +301,14 @@ int execute_kernel(cl_context context, cl_command_queue *queue,
     // Set the result
     result_mem =
         clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_uint) * number_of_work_itmes,
+                       sizeof(cl_uint) * number_of_work_items,
                        returned_results.data(), &error);
     test_error(error, "clCreateBuffer failed");
     error = clSetKernelArg(kernel, i, sizeof(result_mem), &result_mem);
     test_error(error, "clSetKernelArg failed");
 
     // Thread dimensions for execution
-    global_dims[0] = number_of_work_itmes;
+    global_dims[0] = number_of_work_items;
     global_dims[1] = 1;
     global_dims[2] = 1;
 
@@ -427,7 +446,7 @@ int execute_kernel(cl_context context, cl_command_queue *queue,
     // Verify the checksum.
     // Read back the result
     error = clEnqueueReadBuffer(*queue, result_mem, CL_TRUE, 0,
-                                sizeof(cl_uint) * number_of_work_itmes,
+                                sizeof(cl_uint) * number_of_work_items,
                                 returned_results.data(), 0, NULL, NULL);
     test_error_abort(error, "clEnqueueReadBuffer failed");
     final_result = 0;
@@ -436,7 +455,7 @@ int execute_kernel(cl_context context, cl_command_queue *queue,
     {
         // For buffers or read images we are just looking at the sum of what
         // each thread summed up
-        for (i = 0; i < number_of_work_itmes; i++)
+        for (i = 0; i < number_of_work_items; i++)
         {
             final_result += returned_results[i];
         }
diff --git a/test_conformance/allocations/main.cpp b/test_conformance/allocations/main.cpp
index 50533d62..e0310bb8 100644
--- a/test_conformance/allocations/main.cpp
+++ b/test_conformance/allocations/main.cpp
@@ -125,7 +125,7 @@ int doTest(cl_device_id device, cl_context context, cl_command_queue queue,
     int number_of_mems_used;
     cl_ulong max_individual_allocation_size = g_max_individual_allocation_size;
     cl_ulong global_mem_size = g_global_mem_size;
-    unsigned int number_of_work_itmes = 8192 * 32;
+    unsigned int number_of_work_items = 8192 * 32;
     const bool allocate_image =
         (alloc_type != BUFFER) && (alloc_type != BUFFER_NON_BLOCKING);
 
@@ -183,7 +183,7 @@ int doTest(cl_device_id device, cl_context context, cl_command_queue queue,
                  g_reduction_percentage);
         g_max_size = (size_t)((double)g_max_size
                               * (double)g_reduction_percentage / 100.0);
-        number_of_work_itmes = 8192 * 2;
+        number_of_work_items = 8192 * 2;
     }
 
     // Round to nearest MB.
@@ -220,7 +220,7 @@ int doTest(cl_device_id device, cl_context context, cl_command_queue queue,
                 error =
                     execute_kernel(context, &queue, device, alloc_type, mems,
                                    number_of_mems_used, g_write_allocations,
-                                   number_of_work_itmes);
+                                   number_of_work_items);
             }
 
             // If we failed to allocate more than 1/8th of the requested amount

From d7cb6b006b12d216430c2335608b8dfbf42ab5c6 Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Tue, 11 Feb 2025 08:52:53 -0800
Subject: [PATCH 08/12] fix
 negative_create_command_buffer_not_supported_properties test (#2248)

fixes #2247

* For the `negative_create_command_buffer_not_supported_properties`
test, the only property we can check for is simultaneous use. All other
properties are part of other extensions and hence will generate
`CL_INVALID_VALUE`, not `CL_INVALID_PROPERTY`.
* Checks whether the `cl_khr_command_buffer_multi_device` extension is
supported when using `CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR`, instead
of `device_side_enqueue_support`.
* If the `cl_khr_command_buffer_multi_device` extension is NOT supported
and the `CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR` command buffer creation
flag is used, the expected error code is `CL_INVALID_VALUE`, not
`CL_INVALID_PROPERTY`.
---
 .../basic_command_buffer.cpp                  | 30 ++-----------------
 .../negative_command_buffer_create.cpp        |  8 ++---
 2 files changed, 5 insertions(+), 33 deletions(-)

diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
index 39e17762..c17b65d6 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
@@ -197,8 +197,6 @@ struct MultiFlagCreationTest : public BasicCommandBufferTest
     cl_int Run() override
     {
         cl_command_buffer_properties_khr flags = 0;
-        size_t num_flags_set = 0;
-        bool multi_flags_supported = true;
         cl_int error = CL_SUCCESS;
 
         // First try to find multiple flags that are supported by the driver and
@@ -206,30 +204,18 @@ struct MultiFlagCreationTest : public BasicCommandBufferTest
         if (simultaneous_use_support)
         {
             flags |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
-            num_flags_set++;
         }
 
-        if (device_side_enqueue_support)
+        if (is_extension_available(
+                device, CL_KHR_COMMAND_BUFFER_MULTI_DEVICE_EXTENSION_NAME))
         {
             flags |= CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR;
-            num_flags_set++;
         }
 
         if (is_extension_available(
                 device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
         {
             flags |= CL_COMMAND_BUFFER_MUTABLE_KHR;
-            num_flags_set++;
-        }
-
-        // If we can't find multiple supported flags, still set a bitfield but
-        // expect CL_INVALID_PROPERTY to be returned on creation.
-        if (num_flags_set < 2)
-        {
-            flags = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR
-                | CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR;
-
-            multi_flags_supported = false;
         }
 
         cl_command_buffer_properties_khr props[] = {
@@ -237,17 +223,7 @@ struct MultiFlagCreationTest : public BasicCommandBufferTest
         };
 
         command_buffer = clCreateCommandBufferKHR(1, &queue, props, &error);
-        if (multi_flags_supported)
-        {
-            test_error(error, "clCreateCommandBufferKHR failed");
-        }
-        else
-        {
-            test_failure_error_ret(
-                error, CL_INVALID_PROPERTY,
-                "clCreateCommandBufferKHR should return CL_INVALID_PROPERTY",
-                TEST_FAIL);
-        }
+        test_error(error, "clCreateCommandBufferKHR failed");
 
         return CL_SUCCESS;
     }
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_create.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_create.cpp
index ecb2da39..4b4727c7 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_create.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_create.cpp
@@ -137,7 +137,8 @@ struct CreateCommandBufferRepeatedProperties : public BasicCommandBufferTest
             rep_prop = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
             skip = false;
         }
-        else if (device_side_enqueue_support)
+        else if (is_extension_available(
+                     device, CL_KHR_COMMAND_BUFFER_MULTI_DEVICE_EXTENSION_NAME))
         {
             rep_prop = CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR;
             skip = false;
@@ -190,11 +191,6 @@ struct CreateCommandBufferNotSupportedProperties : public BasicCommandBufferTest
             unsupported_prop = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
             skip = false;
         }
-        else if (!device_side_enqueue_support)
-        {
-            unsupported_prop = CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR;
-            skip = false;
-        }
 
         return skip;
     }

From 0bfe516318269e6326eafa57429f1b8179c16a28 Mon Sep 17 00:00:00 2001
From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com>
Date: Tue, 11 Feb 2025 16:54:47 +0000
Subject: [PATCH 09/12] Add the half ulps values for embedded profile (#2265)

This change adds the half ulps values for embedded profile.


https://github.com/KhronosGroup/OpenCL-CTS/issues/1685
---
 .../math_brute_force/function_list.cpp        | 221 +++++++++---------
 .../math_brute_force/function_list.h          |   1 +
 test_conformance/math_brute_force/utility.cpp |  10 +-
 3 files changed, 124 insertions(+), 108 deletions(-)

diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
index fcf1ea23..14e0830a 100644
--- a/test_conformance/math_brute_force/function_list.cpp
+++ b/test_conformance/math_brute_force/function_list.cpp
@@ -29,31 +29,32 @@
 // Only use ulps information in spir test
 #ifdef FUNCTION_LIST_ULPS_ONLY
 
-#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _rmode, _type)            \
+#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _half_embedded_ulp,       \
+              _rmode, _type)                                                   \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL },      \
-            _ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,  \
-            RELAXED_OFF, _type                                                 \
+            _ulp, _ulp, _half_ulp, _half_embedded_ulp, _embedded_ulp,          \
+            INFINITY, INFINITY, _rmode, RELAXED_OFF, _type                     \
     }
-#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _relaxed_ulp, _rmode, \
-                  _type, _relaxed_embedded_ulp)                                \
+#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _half_embedded_ulp,   \
+                  _relaxed_ulp, _rmode, _type, _relaxed_embedded_ulp)          \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL },      \
-            _ulp, _ulp, _half_ulp, _embedded_ulp, _relaxed_ulp,                \
-            _relaxed_embedded_ulp, _rmode, RELAXED_ON, _type                   \
+            _ulp, _ulp, _half_ulp, _half_embedded_ulp, _embedded_ulp,          \
+            _relaxed_ulp, _relaxed_embedded_ulp, _rmode, RELAXED_ON, _type     \
     }
 #define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                  \
     {                                                                          \
         "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), { NULL },          \
-            { NULL }, { NULL }, _ulp, _ulp, _ulp, _embedded_ulp, INFINITY,     \
-            INFINITY, _rmode, RELAXED_OFF, _type                               \
+            { NULL }, { NULL }, _ulp, _ulp, _ulp, _ulp, _embedded_ulp,         \
+            INFINITY, INFINITY, _rmode, RELAXED_OFF, _type                     \
     }
 #define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _half_ulp,       \
-                       _rmode, _type)                                          \
+                       _half_embedded_ulp, _rmode, _type)                      \
     {                                                                          \
         STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
-            _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, \
-            _type                                                              \
+            _half_ulp, _half_embedded_ulp, _embedded_ulp, INFINITY, INFINITY,  \
+            _rmode, RELAXED_OFF, _type                                         \
     }
 
 #define unaryF NULL
@@ -88,35 +89,37 @@
 
 #else // FUNCTION_LIST_ULPS_ONLY
 
-#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _rmode, _type)            \
+#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _half_embedded_ulp,       \
+              _rmode, _type)                                                   \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name },      \
             { (void*)reference_##_name##l }, { (void*)reference_##_name },     \
-            _ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,  \
-            RELAXED_OFF, _type                                                 \
+            _ulp, _ulp, _half_ulp, _half_embedded_ulp, _embedded_ulp,          \
+            INFINITY, INFINITY, _rmode, RELAXED_OFF, _type                     \
     }
-#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _relaxed_ulp, _rmode, \
-                  _type, _relaxed_embedded_ulp)                                \
+#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _half_embedded_ulp,   \
+                  _relaxed_ulp, _rmode, _type, _relaxed_embedded_ulp)          \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name },      \
             { (void*)reference_##_name##l },                                   \
             { (void*)reference_##relaxed_##_name }, _ulp, _ulp, _half_ulp,     \
-            _embedded_ulp, _relaxed_ulp, _relaxed_embedded_ulp, _rmode,        \
-            RELAXED_ON, _type                                                  \
+            _half_embedded_ulp, _embedded_ulp, _relaxed_ulp,                   \
+            _relaxed_embedded_ulp, _rmode, RELAXED_ON, _type                   \
     }
 #define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                  \
     {                                                                          \
         "half_" STRINGIFY(_name), "half_" STRINGIFY(_name),                    \
             { (void*)reference_##_name }, { NULL }, { NULL }, _ulp, _ulp,      \
-            _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF,      \
-            _type                                                              \
+            _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,             \
+            RELAXED_OFF, _type                                                 \
     }
 #define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _half_ulp,       \
-                       _rmode, _type)                                          \
+                       _half_embedded_ulp, _rmode, _type)                      \
     {                                                                          \
         STRINGIFY(_name), _operator, { (void*)reference_##_name },             \
             { (void*)reference_##_name##l }, { NULL }, _ulp, _ulp, _half_ulp,  \
-            _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
+            _half_embedded_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,     \
+            RELAXED_OFF, _type                                                 \
     }
 
 static constexpr vtbl _unary = {
@@ -252,19 +255,19 @@ static constexpr vtbl _mad_tbl = {
 
 // clang-format off
 const Func functionList[] = {
-    ENTRY_EXT(acos, 4.0f, 4.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(acosh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY(acospi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(asin, 4.0f, 4.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(asinh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY(asinpi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(atan, 5.0f, 5.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(atanh, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY(atanpi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY(atan2, 6.0f, 6.0f, 2.0f, FTZ_OFF, binaryF),
-    ENTRY(atan2pi, 6.0f, 6.0f, 2.0f, FTZ_OFF, binaryF),
-    ENTRY(cbrt, 2.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
-    ENTRY(ceil, 0.0f, 0.0f, 0.f, FTZ_OFF, unaryF),
+    ENTRY_EXT(acos, 4.0f, 4.0f, 2.0f, 3.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
+    ENTRY(acosh, 4.0f, 4.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(acospi, 5.0f, 5.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY_EXT(asin, 4.0f, 4.0f, 2.0f, 3.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
+    ENTRY(asinh, 4.0f, 4.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(asinpi, 5.0f, 5.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY_EXT(atan, 5.0f, 5.0f, 2.0f, 3.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
+    ENTRY(atanh, 5.0f, 5.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(atanpi, 5.0f, 5.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(atan2, 6.0f, 6.0f, 2.0f, 3.0f, FTZ_OFF, binaryF),
+    ENTRY(atan2pi, 6.0f, 6.0f, 2.0f, 3.0f, FTZ_OFF, binaryF),
+    ENTRY(cbrt, 2.0f, 4.0f, 2.0f, 2.0f, FTZ_OFF, unaryF),
+    ENTRY(ceil, 0.0f, 0.0f, 0.f, 0.f, FTZ_OFF, unaryF),
     { "copysign",
       "copysign",
       { (void*)reference_copysignf },
@@ -274,96 +277,97 @@ const Func functionList[] = {
       0.0f,
       0.0f,
       0.0f,
+      0.0f,
       INFINITY,
       INFINITY,
       FTZ_OFF,
       RELAXED_OFF,
       binaryF },
-    ENTRY_EXT(cos, 4.0f, 4.0f, 2.f, 0.00048828125f, FTZ_OFF, unaryF,
+    ENTRY_EXT(cos, 4.0f, 4.0f, 2.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
-    ENTRY(cosh, 4.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
-    ENTRY_EXT(cospi, 4.0f, 4.0f, 2.f, 0.00048828125f, FTZ_OFF, unaryF,
+    ENTRY(cosh, 4.0f, 4.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY_EXT(cospi, 4.0f, 4.0f, 2.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
-    ENTRY(erfc, 16.0f, 16.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(erf,  16.0f, 16.0f, 4.0f, FTZ_OFF, unaryF),
+    ENTRY(erfc, 16.0f, 16.0f, 4.0f, 4.0f, FTZ_OFF, unaryF),
+    ENTRY(erf,  16.0f, 16.0f, 4.0f, 4.0f, FTZ_OFF, unaryF),
 
     // relaxed error is overwritten in unary.c as it is 3+floor(fabs(2*x))
-    ENTRY_EXT(exp, 3.0f, 4.0f, 2.f, 3.0f, FTZ_OFF, unaryF, 4.0f),
+    ENTRY_EXT(exp, 3.0f, 4.0f, 2.0f, 3.0f, 3.0f, FTZ_OFF, unaryF, 4.0f),
 
     // relaxed error is overwritten in unary.c as it is 3+floor(fabs(2*x))
-    ENTRY_EXT(exp2, 3.0f, 4.0f, 2.f, 3.0f, FTZ_OFF, unaryF, 4.0f),
+    ENTRY_EXT(exp2, 3.0f, 4.0f, 2.0f, 3.0f, 3.0f, FTZ_OFF, unaryF, 4.0f),
 
     // relaxed error is overwritten in unary.c as it is 3+floor(fabs(2*x)) in derived mode;
     // in non-derived mode it uses the ulp error for half_exp10.
-    ENTRY_EXT(exp10, 3.0f, 4.0f, 2.f, 8192.0f, FTZ_OFF, unaryF, 8192.0f),
+    ENTRY_EXT(exp10, 3.0f, 4.0f, 2.0f, 3.0f, 8192.0f, FTZ_OFF, unaryF, 8192.0f),
 
-    ENTRY(expm1, 3.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
-    ENTRY(fabs, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(fdim, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(floor, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(fma, 0.0f, 0.0f, 0.0f, FTZ_OFF, ternaryF),
-    ENTRY(fmax, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fmin, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fmod, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fract, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
-    ENTRY(frexp, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
-    ENTRY(hypot, 4.0f, 4.0f, 2.0f, FTZ_OFF, binaryF),
-    ENTRY(ilogb, 0.0f, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
-    ENTRY(isequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isfinite, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isgreater, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isgreaterequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isinf, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isless, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(islessequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(islessgreater, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isnan, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isnormal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isnotequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isordered, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isunordered, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(ldexp, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
-    ENTRY(lgamma, INFINITY, INFINITY, INFINITY, FTZ_OFF, unaryF),
-    ENTRY(lgamma_r, INFINITY, INFINITY, INFINITY, FTZ_OFF,
+    ENTRY(expm1, 3.0f, 4.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(fabs, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(fdim, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(floor, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(fma, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, ternaryF),
+    ENTRY(fmax, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(fmin, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(fmod, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(fract, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
+    ENTRY(frexp, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
+    ENTRY(hypot, 4.0f, 4.0f,  2.0f, 3.0f, FTZ_OFF, binaryF),
+    ENTRY(ilogb, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
+    ENTRY(isequal, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isfinite, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY(isgreater, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isgreaterequal, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isinf, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY(isless, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(islessequal, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(islessgreater, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isnan, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY(isnormal, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY(isnotequal, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isordered, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isunordered, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(ldexp, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
+    ENTRY(lgamma, INFINITY, INFINITY, INFINITY, INFINITY, FTZ_OFF, unaryF),
+    ENTRY(lgamma_r, INFINITY, INFINITY, INFINITY, INFINITY, FTZ_OFF,
           unaryF_two_results_i),
-    ENTRY_EXT(log, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
+    ENTRY_EXT(log, 3.0f, 4.0f, 2.0f, 3.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
               4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY_EXT(log2, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
+    ENTRY_EXT(log2, 3.0f, 4.0f, 2.0f, 3.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
               4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY_EXT(log10, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
+    ENTRY_EXT(log10, 3.0f, 4.0f, 2.0f, 3.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
               4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY(log1p, 2.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY(logb, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(log1p, 2.0f, 4.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(logb, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
 
     // In fast-relaxed-math mode it has to be either exactly rounded fma or exactly rounded a*b+c
-    ENTRY_EXT(mad, INFINITY, INFINITY, INFINITY, INFINITY, FTZ_OFF, mad_function, INFINITY),
+    ENTRY_EXT(mad, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, FTZ_OFF, mad_function, INFINITY),
 
-    ENTRY(maxmag, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(minmag, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(modf, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
-    ENTRY(nan, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
-    ENTRY(nextafter, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_nextafter),
+    ENTRY(maxmag, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(minmag, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(modf, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
+    ENTRY(nan, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
+    ENTRY(nextafter, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_nextafter),
 
     // In derived mode the ulp error is calculated as exp2(y*log2(x)).
     // In non-derived it is the same as half_pow.
-    ENTRY_EXT(pow, 16.0f, 16.0f, 4.0f, 8192.0f, FTZ_OFF, binaryF, 8192.0f),
+    ENTRY_EXT(pow, 16.0f, 16.0f, 4.0f, 5.0f, 8192.0f, FTZ_OFF, binaryF, 8192.0f),
 
-    ENTRY(pown, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
-    ENTRY(powr, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF),
-    ENTRY(remainder, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(remquo, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
-    ENTRY(rint, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(rootn, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
-    ENTRY(round, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(rsqrt, 2.0f, 4.0f, 1.0f, FTZ_OFF, unaryF),
-    ENTRY(signbit, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY_EXT(sin, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
+    ENTRY(pown, 16.0f, 16.0f, 4.0f, 5.0f, FTZ_OFF, binaryF_i),
+    ENTRY(powr, 16.0f, 16.0f, 4.0f, 5.0f, FTZ_OFF, binaryF),
+    ENTRY(remainder, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(remquo, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
+    ENTRY(rint, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(rootn, 16.0f, 16.0f, 4.0f, 5.0f, FTZ_OFF, binaryF_i),
+    ENTRY(round, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(rsqrt, 2.0f, 4.0f, 1.0f, 1.0f, FTZ_OFF, unaryF),
+    ENTRY(signbit, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY_EXT(sin, 4.0f, 4.0f, 2.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
-    ENTRY_EXT(sincos, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF,
+    ENTRY_EXT(sincos, 4.0f, 4.0f, 2.0f, 2.0f, 0.00048828125f, FTZ_OFF,
               unaryF_two_results,
               0.00048828125f), // relaxed ulp 2^-11
-    ENTRY(sinh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(sinpi, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
+    ENTRY(sinh, 4.0f, 4.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY_EXT(sinpi, 4.0f, 4.0f, 2.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
     { "sqrt",
       "sqrt",
@@ -373,6 +377,7 @@ const Func functionList[] = {
       3.0f,
       0.0f,
       0.0f,
+      1.0f,
       4.0f,
       INFINITY,
       INFINITY,
@@ -390,18 +395,19 @@ const Func functionList[] = {
       INFINITY,
       INFINITY,
       INFINITY,
+      INFINITY,
       FTZ_OFF,
       RELAXED_OFF,
       unaryOF /* only for single precision */ },
 
     // In derived mode it the ulp error is calculated as sin/cos.
     // In non-derived mode it is the same as half_tan.
-    ENTRY_EXT(tan, 5.0f, 5.0f, 2.0f, 8192.0f, FTZ_OFF, unaryF, 8192.0f),
+    ENTRY_EXT(tan, 5.0f, 5.0f, 2.0f, 3.0f, 8192.0f, FTZ_OFF, unaryF, 8192.0f),
 
-    ENTRY(tanh, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY(tanpi, 6.0f, 6.0f, 2.0f, FTZ_OFF, unaryF),
+    ENTRY(tanh, 5.0f, 5.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(tanpi, 6.0f, 6.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
     //ENTRY(tgamma, 16.0f, 16.0f, FTZ_OFF, unaryF), Commented this out until we can be sure this requirement is realistic
-    ENTRY(trunc, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(trunc, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
 
     HALF_ENTRY(cos, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
     HALF_ENTRY(divide, 8192.0f, 8192.0f, FTZ_ON, binaryOF),
@@ -419,8 +425,8 @@ const Func functionList[] = {
     HALF_ENTRY(tan, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
 
     // basic operations
-    OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
-    OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
+    OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
+    OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
     //ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
     { "reciprocal",
       "/",
@@ -430,6 +436,7 @@ const Func functionList[] = {
       2.5f,
       0.0f,
       0.0f,
+      1.0f,
       3.0f,
       2.5f,
       INFINITY,
@@ -444,6 +451,7 @@ const Func functionList[] = {
       2.5f,
       0.0f,
       1.0f,
+      1.0f,
       3.0f,
       2.5f,
       INFINITY,
@@ -461,13 +469,14 @@ const Func functionList[] = {
       INFINITY,
       INFINITY,
       INFINITY,
+      INFINITY,
       FTZ_OFF,
       RELAXED_OFF,
       binaryOperatorOF /* only for single precision */ },
-    OPERATOR_ENTRY(multiply, "*", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
-    OPERATOR_ENTRY(assignment, "", 0.0f, 0.0f, 0.0f, FTZ_OFF,
+    OPERATOR_ENTRY(multiply, "*", 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
+    OPERATOR_ENTRY(assignment, "", 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF,
                    unaryF), // A simple copy operation
-    OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
 };
 // clang-format on
 
diff --git a/test_conformance/math_brute_force/function_list.h b/test_conformance/math_brute_force/function_list.h
index 56190e33..bdcc016d 100644
--- a/test_conformance/math_brute_force/function_list.h
+++ b/test_conformance/math_brute_force/function_list.h
@@ -87,6 +87,7 @@ struct Func
     float float_ulps;
     float double_ulps;
     float half_ulps;
+    float half_embedded_ulps;
     float float_embedded_ulps;
     float relaxed_error;
     float relaxed_embedded_error;
diff --git a/test_conformance/math_brute_force/utility.cpp b/test_conformance/math_brute_force/utility.cpp
index 53dd928c..741b3e41 100644
--- a/test_conformance/math_brute_force/utility.cpp
+++ b/test_conformance/math_brute_force/utility.cpp
@@ -195,8 +195,14 @@ float getAllowedUlpError(const Func *f, Type t, const bool relaxed)
             // TODO: distinguish between embedded and full profile.
             return f->double_ulps;
         case khalf:
-            // TODO: distinguish between embedded and full profile.
-            return f->half_ulps;
+            if (gIsEmbedded)
+            {
+                return f->half_embedded_ulps;
+            }
+            else
+            {
+                return f->half_ulps;
+            }
         default:
             assert(false && "unsupported type in getAllowedUlpError");
             // Return a negative value which will make any test fail.

From a61feea65677c2bbaf479a413d5f28a1e3464920 Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Tue, 11 Feb 2025 08:55:39 -0800
Subject: [PATCH 10/12] adds SPIR-V tests for scalar printf operands (#2211)

Adds targeted SPIR-V tests for printf with scalar operands.  See:

* https://github.com/KhronosGroup/OpenCL-Docs/issues/1211
* https://github.com/KhronosGroup/OpenCL-Docs/pull/1236

The fp32 test is likely to be the most interesting, especially on
devices that support fp64, because printf with scalar fp32 operands is
not generated by default in this case with Clang and the SPIR-V LLVM
Translator.
---
 test_common/harness/os_helpers.cpp            |   2 +-
 test_conformance/spirv_new/CMakeLists.txt     |   1 +
 .../printf_operands_scalar_fp32.spvasm32      |  85 ++++++
 .../printf_operands_scalar_fp32.spvasm64      |  85 ++++++
 .../printf_operands_scalar_fp64.spvasm32      |  93 +++++++
 .../printf_operands_scalar_fp64.spvasm64      |  93 +++++++
 .../printf_operands_scalar_int32.spvasm32     | 140 ++++++++++
 .../printf_operands_scalar_int32.spvasm64     | 140 ++++++++++
 .../printf_operands_scalar_int64.spvasm32     |  77 ++++++
 .../printf_operands_scalar_int64.spvasm64     |  77 ++++++
 .../spirv_new/test_extinst_printf.cpp         | 259 ++++++++++++++++++
 11 files changed, 1051 insertions(+), 1 deletion(-)
 create mode 100644 test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm32
 create mode 100644 test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm64
 create mode 100644 test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm32
 create mode 100644 test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm64
 create mode 100644 test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm32
 create mode 100644 test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm64
 create mode 100644 test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm32
 create mode 100644 test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm64
 create mode 100644 test_conformance/spirv_new/test_extinst_printf.cpp

diff --git a/test_common/harness/os_helpers.cpp b/test_common/harness/os_helpers.cpp
index c64c5901..b7087511 100644
--- a/test_common/harness/os_helpers.cpp
+++ b/test_common/harness/os_helpers.cpp
@@ -577,7 +577,7 @@ char* get_temp_filename()
     close(fd);
 #elif defined(_WIN32)
     UINT ret = GetTempFileName(".", "tmp", 0, gFileName);
-    if (ret == 0) return gFileName;
+    if (ret == 0) return strdup(gFileName);
 #else
     MTdata d = init_genrand((cl_uint)time(NULL));
     sprintf(gFileName, "tmpfile.%u", genrand_int32(d));
diff --git a/test_conformance/spirv_new/CMakeLists.txt b/test_conformance/spirv_new/CMakeLists.txt
index 805e851b..c635e924 100644
--- a/test_conformance/spirv_new/CMakeLists.txt
+++ b/test_conformance/spirv_new/CMakeLists.txt
@@ -5,6 +5,7 @@ set(${MODULE_NAME}_SOURCES
   test_basic_versions.cpp
   test_cl_khr_expect_assume.cpp
   test_decorate.cpp
+  test_extinst_printf.cpp
   test_get_program_il.cpp
   test_linkage.cpp
   test_no_integer_wrap_decoration.cpp
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm32 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm32
new file mode 100644
index 00000000..58631498
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm32
@@ -0,0 +1,85 @@
+; kernel void printf_operands_scalar_fp32(float f)
+; {
+;     printf("a = %.1a\n", f);
+;     printf("A = %.1A\n", f);
+;     printf("e = %.1e\n", f);
+;     printf("E = %.1E\n", f);
+;     printf("f = %.1f\n", f);
+;     printf("F = %.1F\n", f);
+;     printf("g = %.1g\n", f);
+;     printf("G = %.1G\n", f);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_fp32"
+      %uchar = OpTypeInt 8 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+  %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+  %uchar_dot = OpConstant %uchar 46
+    %uchar_1 = OpConstant %uchar 49
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_A = OpConstant %uchar 65
+    %uchar_E = OpConstant %uchar 69
+    %uchar_F = OpConstant %uchar 70
+    %uchar_G = OpConstant %uchar 71
+    %uchar_a = OpConstant %uchar 97
+    %uchar_e = OpConstant %uchar 101
+    %uchar_f = OpConstant %uchar 102
+    %uchar_g = OpConstant %uchar 103
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+       %void = OpTypeVoid
+      %float = OpTypeFloat 32
+ %kernel_sig = OpTypeFunction %void %float
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+    %array_a = OpConstantComposite %string_10 %uchar_a %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_a %uchar_nl %uchar_nul ; "a = %.1a\n"
+   %string_a = OpVariable %cptr_string_10 UniformConstant %array_a
+    %array_A = OpConstantComposite %string_10 %uchar_A %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_A %uchar_nl %uchar_nul ; "A = %.1A\n"
+   %string_A = OpVariable %cptr_string_10 UniformConstant %array_A
+    %array_e = OpConstantComposite %string_10 %uchar_e %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_e %uchar_nl %uchar_nul ; "e = %.1e\n"
+   %string_e = OpVariable %cptr_string_10 UniformConstant %array_e
+    %array_E = OpConstantComposite %string_10 %uchar_E %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_E %uchar_nl %uchar_nul ; "E = %.1E\n"
+   %string_E = OpVariable %cptr_string_10 UniformConstant %array_E
+    %array_f = OpConstantComposite %string_10 %uchar_f %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_f %uchar_nl %uchar_nul ; "f = %.1f\n"
+   %string_f = OpVariable %cptr_string_10 UniformConstant %array_f
+    %array_F = OpConstantComposite %string_10 %uchar_F %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_F %uchar_nl %uchar_nul ; "F = %.1F\n"
+   %string_F = OpVariable %cptr_string_10 UniformConstant %array_F
+    %array_g = OpConstantComposite %string_10 %uchar_g %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_g %uchar_nl %uchar_nul ; "g = %.1g\n"
+   %string_g = OpVariable %cptr_string_10 UniformConstant %array_g
+    %array_G = OpConstantComposite %string_10 %uchar_G %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_G %uchar_nl %uchar_nul ; "G = %.1G\n"
+   %string_G = OpVariable %cptr_string_10 UniformConstant %array_G
+
+       %test = OpFunction %void None %kernel_sig
+          %f = OpFunctionParameter %float
+      %entry = OpLabel
+
+      %fmt_a = OpBitcast %cptr_char %string_a
+   %printf_a = OpExtInst %uint %clext printf %fmt_a %f
+      %fmt_A = OpBitcast %cptr_char %string_A
+   %printf_A = OpExtInst %uint %clext printf %fmt_A %f
+      %fmt_e = OpBitcast %cptr_char %string_e
+   %printf_e = OpExtInst %uint %clext printf %fmt_e %f
+      %fmt_E = OpBitcast %cptr_char %string_E
+   %printf_E = OpExtInst %uint %clext printf %fmt_E %f
+      %fmt_f = OpBitcast %cptr_char %string_f
+   %printf_f = OpExtInst %uint %clext printf %fmt_f %f
+      %fmt_F = OpBitcast %cptr_char %string_F
+   %printf_F = OpExtInst %uint %clext printf %fmt_F %f
+      %fmt_g = OpBitcast %cptr_char %string_g
+   %printf_g = OpExtInst %uint %clext printf %fmt_g %f
+      %fmt_G = OpBitcast %cptr_char %string_G
+   %printf_G = OpExtInst %uint %clext printf %fmt_G %f
+
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm64 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm64
new file mode 100644
index 00000000..ba415d09
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm64
@@ -0,0 +1,85 @@
+; kernel void printf_operands_scalar_fp32(float f)
+; {
+;     printf("a = %.1a\n", f);
+;     printf("A = %.1A\n", f);
+;     printf("e = %.1e\n", f);
+;     printf("E = %.1E\n", f);
+;     printf("f = %.1f\n", f);
+;     printf("F = %.1F\n", f);
+;     printf("g = %.1g\n", f);
+;     printf("G = %.1G\n", f);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_fp32"
+      %uchar = OpTypeInt 8 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+  %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+  %uchar_dot = OpConstant %uchar 46
+    %uchar_1 = OpConstant %uchar 49
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_A = OpConstant %uchar 65
+    %uchar_E = OpConstant %uchar 69
+    %uchar_F = OpConstant %uchar 70
+    %uchar_G = OpConstant %uchar 71
+    %uchar_a = OpConstant %uchar 97
+    %uchar_e = OpConstant %uchar 101
+    %uchar_f = OpConstant %uchar 102
+    %uchar_g = OpConstant %uchar 103
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+       %void = OpTypeVoid
+      %float = OpTypeFloat 32
+ %kernel_sig = OpTypeFunction %void %float
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+    %array_a = OpConstantComposite %string_10 %uchar_a %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_a %uchar_nl %uchar_nul ; "a = %.1a\n"
+   %string_a = OpVariable %cptr_string_10 UniformConstant %array_a
+    %array_A = OpConstantComposite %string_10 %uchar_A %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_A %uchar_nl %uchar_nul ; "A = %.1A\n"
+   %string_A = OpVariable %cptr_string_10 UniformConstant %array_A
+    %array_e = OpConstantComposite %string_10 %uchar_e %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_e %uchar_nl %uchar_nul ; "e = %.1e\n"
+   %string_e = OpVariable %cptr_string_10 UniformConstant %array_e
+    %array_E = OpConstantComposite %string_10 %uchar_E %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_E %uchar_nl %uchar_nul ; "E = %.1E\n"
+   %string_E = OpVariable %cptr_string_10 UniformConstant %array_E
+    %array_f = OpConstantComposite %string_10 %uchar_f %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_f %uchar_nl %uchar_nul ; "f = %.1f\n"
+   %string_f = OpVariable %cptr_string_10 UniformConstant %array_f
+    %array_F = OpConstantComposite %string_10 %uchar_F %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_F %uchar_nl %uchar_nul ; "F = %.1F\n"
+   %string_F = OpVariable %cptr_string_10 UniformConstant %array_F
+    %array_g = OpConstantComposite %string_10 %uchar_g %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_g %uchar_nl %uchar_nul ; "g = %.1g\n"
+   %string_g = OpVariable %cptr_string_10 UniformConstant %array_g
+    %array_G = OpConstantComposite %string_10 %uchar_G %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_G %uchar_nl %uchar_nul ; "G = %.1G\n"
+   %string_G = OpVariable %cptr_string_10 UniformConstant %array_G
+
+       %test = OpFunction %void None %kernel_sig
+          %f = OpFunctionParameter %float
+      %entry = OpLabel
+
+      %fmt_a = OpBitcast %cptr_char %string_a
+   %printf_a = OpExtInst %uint %clext printf %fmt_a %f
+      %fmt_A = OpBitcast %cptr_char %string_A
+   %printf_A = OpExtInst %uint %clext printf %fmt_A %f
+      %fmt_e = OpBitcast %cptr_char %string_e
+   %printf_e = OpExtInst %uint %clext printf %fmt_e %f
+      %fmt_E = OpBitcast %cptr_char %string_E
+   %printf_E = OpExtInst %uint %clext printf %fmt_E %f
+      %fmt_f = OpBitcast %cptr_char %string_f
+   %printf_f = OpExtInst %uint %clext printf %fmt_f %f
+      %fmt_F = OpBitcast %cptr_char %string_F
+   %printf_F = OpExtInst %uint %clext printf %fmt_F %f
+      %fmt_g = OpBitcast %cptr_char %string_g
+   %printf_g = OpExtInst %uint %clext printf %fmt_g %f
+      %fmt_G = OpBitcast %cptr_char %string_G
+   %printf_G = OpExtInst %uint %clext printf %fmt_G %f
+
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm32 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm32
new file mode 100644
index 00000000..1b31cf49
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm32
@@ -0,0 +1,93 @@
+; kernel void printf_operands_scalar_fp64(double d)
+; {
+;     printf("a = %.1a\n", d);
+;     printf("A = %.1A\n", d);
+;     printf("e = %.1e\n", d);
+;     printf("E = %.1E\n", d);
+;     printf("f = %.1f\n", d);
+;     printf("F = %.1F\n", d);
+;     printf("g = %.1g\n", d);
+;     printf("G = %.1G\n", d);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Float64
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_fp64"
+      %uchar = OpTypeInt 8 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+  %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+  %uchar_dot = OpConstant %uchar 46
+    %uchar_1 = OpConstant %uchar 49
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_A = OpConstant %uchar 65
+    %uchar_E = OpConstant %uchar 69
+    %uchar_F = OpConstant %uchar 70
+    %uchar_G = OpConstant %uchar 71
+    %uchar_X = OpConstant %uchar 88
+    %uchar_a = OpConstant %uchar 97
+    %uchar_d = OpConstant %uchar 100
+    %uchar_e = OpConstant %uchar 101
+    %uchar_f = OpConstant %uchar 102
+    %uchar_g = OpConstant %uchar 103
+    %uchar_h = OpConstant %uchar 104
+    %uchar_i = OpConstant %uchar 105
+    %uchar_o = OpConstant %uchar 111
+    %uchar_u = OpConstant %uchar 117
+    %uchar_x = OpConstant %uchar 120
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+       %void = OpTypeVoid
+     %double = OpTypeFloat 64
+ %kernel_sig = OpTypeFunction %void %double
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+    %array_a = OpConstantComposite %string_10 %uchar_a %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_a %uchar_nl %uchar_nul ; "a = %.1a\n"
+   %string_a = OpVariable %cptr_string_10 UniformConstant %array_a
+    %array_A = OpConstantComposite %string_10 %uchar_A %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_A %uchar_nl %uchar_nul ; "A = %.1A\n"
+   %string_A = OpVariable %cptr_string_10 UniformConstant %array_A
+    %array_e = OpConstantComposite %string_10 %uchar_e %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_e %uchar_nl %uchar_nul ; "e = %.1e\n"
+   %string_e = OpVariable %cptr_string_10 UniformConstant %array_e
+    %array_E = OpConstantComposite %string_10 %uchar_E %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_E %uchar_nl %uchar_nul ; "E = %.1E\n"
+   %string_E = OpVariable %cptr_string_10 UniformConstant %array_E
+    %array_f = OpConstantComposite %string_10 %uchar_f %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_f %uchar_nl %uchar_nul ; "f = %.1f\n"
+   %string_f = OpVariable %cptr_string_10 UniformConstant %array_f
+    %array_F = OpConstantComposite %string_10 %uchar_F %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_F %uchar_nl %uchar_nul ; "F = %.1F\n"
+   %string_F = OpVariable %cptr_string_10 UniformConstant %array_F
+    %array_g = OpConstantComposite %string_10 %uchar_g %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_g %uchar_nl %uchar_nul ; "g = %.1g\n"
+   %string_g = OpVariable %cptr_string_10 UniformConstant %array_g
+    %array_G = OpConstantComposite %string_10 %uchar_G %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_G %uchar_nl %uchar_nul ; "G = %.1G\n"
+   %string_G = OpVariable %cptr_string_10 UniformConstant %array_G
+
+       %test = OpFunction %void None %kernel_sig
+          %d = OpFunctionParameter %double
+      %entry = OpLabel
+
+      %fmt_a = OpBitcast %cptr_char %string_a
+   %printf_a = OpExtInst %uint %clext printf %fmt_a %d
+      %fmt_A = OpBitcast %cptr_char %string_A
+   %printf_A = OpExtInst %uint %clext printf %fmt_A %d
+      %fmt_e = OpBitcast %cptr_char %string_e
+   %printf_e = OpExtInst %uint %clext printf %fmt_e %d
+      %fmt_E = OpBitcast %cptr_char %string_E
+   %printf_E = OpExtInst %uint %clext printf %fmt_E %d
+      %fmt_f = OpBitcast %cptr_char %string_f
+   %printf_f = OpExtInst %uint %clext printf %fmt_f %d
+      %fmt_F = OpBitcast %cptr_char %string_F
+   %printf_F = OpExtInst %uint %clext printf %fmt_F %d
+      %fmt_g = OpBitcast %cptr_char %string_g
+   %printf_g = OpExtInst %uint %clext printf %fmt_g %d
+      %fmt_G = OpBitcast %cptr_char %string_G
+   %printf_G = OpExtInst %uint %clext printf %fmt_G %d
+
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm64 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm64
new file mode 100644
index 00000000..a947e5ec
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm64
@@ -0,0 +1,93 @@
+; kernel void printf_operands_scalar_fp64(double d)
+; {
+;     printf("a = %.1a\n", d);
+;     printf("A = %.1A\n", d);
+;     printf("e = %.1e\n", d);
+;     printf("E = %.1E\n", d);
+;     printf("f = %.1f\n", d);
+;     printf("F = %.1F\n", d);
+;     printf("g = %.1g\n", d);
+;     printf("G = %.1G\n", d);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Float64
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_fp64"
+      %uchar = OpTypeInt 8 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+  %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+  %uchar_dot = OpConstant %uchar 46
+    %uchar_1 = OpConstant %uchar 49
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_A = OpConstant %uchar 65
+    %uchar_E = OpConstant %uchar 69
+    %uchar_F = OpConstant %uchar 70
+    %uchar_G = OpConstant %uchar 71
+    %uchar_X = OpConstant %uchar 88
+    %uchar_a = OpConstant %uchar 97
+    %uchar_d = OpConstant %uchar 100
+    %uchar_e = OpConstant %uchar 101
+    %uchar_f = OpConstant %uchar 102
+    %uchar_g = OpConstant %uchar 103
+    %uchar_h = OpConstant %uchar 104
+    %uchar_i = OpConstant %uchar 105
+    %uchar_o = OpConstant %uchar 111
+    %uchar_u = OpConstant %uchar 117
+    %uchar_x = OpConstant %uchar 120
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+       %void = OpTypeVoid
+     %double = OpTypeFloat 64
+ %kernel_sig = OpTypeFunction %void %double
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+    %array_a = OpConstantComposite %string_10 %uchar_a %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_a %uchar_nl %uchar_nul ; "a = %.1a\n"
+   %string_a = OpVariable %cptr_string_10 UniformConstant %array_a
+    %array_A = OpConstantComposite %string_10 %uchar_A %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_A %uchar_nl %uchar_nul ; "A = %.1A\n"
+   %string_A = OpVariable %cptr_string_10 UniformConstant %array_A
+    %array_e = OpConstantComposite %string_10 %uchar_e %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_e %uchar_nl %uchar_nul ; "e = %.1e\n"
+   %string_e = OpVariable %cptr_string_10 UniformConstant %array_e
+    %array_E = OpConstantComposite %string_10 %uchar_E %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_E %uchar_nl %uchar_nul ; "E = %.1E\n"
+   %string_E = OpVariable %cptr_string_10 UniformConstant %array_E
+    %array_f = OpConstantComposite %string_10 %uchar_f %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_f %uchar_nl %uchar_nul ; "f = %.1f\n"
+   %string_f = OpVariable %cptr_string_10 UniformConstant %array_f
+    %array_F = OpConstantComposite %string_10 %uchar_F %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_F %uchar_nl %uchar_nul ; "F = %.1F\n"
+   %string_F = OpVariable %cptr_string_10 UniformConstant %array_F
+    %array_g = OpConstantComposite %string_10 %uchar_g %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_g %uchar_nl %uchar_nul ; "g = %.1g\n"
+   %string_g = OpVariable %cptr_string_10 UniformConstant %array_g
+    %array_G = OpConstantComposite %string_10 %uchar_G %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_G %uchar_nl %uchar_nul ; "G = %.1G\n"
+   %string_G = OpVariable %cptr_string_10 UniformConstant %array_G
+
+       %test = OpFunction %void None %kernel_sig
+          %d = OpFunctionParameter %double
+      %entry = OpLabel
+
+      %fmt_a = OpBitcast %cptr_char %string_a
+   %printf_a = OpExtInst %uint %clext printf %fmt_a %d
+      %fmt_A = OpBitcast %cptr_char %string_A
+   %printf_A = OpExtInst %uint %clext printf %fmt_A %d
+      %fmt_e = OpBitcast %cptr_char %string_e
+   %printf_e = OpExtInst %uint %clext printf %fmt_e %d
+      %fmt_E = OpBitcast %cptr_char %string_E
+   %printf_E = OpExtInst %uint %clext printf %fmt_E %d
+      %fmt_f = OpBitcast %cptr_char %string_f
+   %printf_f = OpExtInst %uint %clext printf %fmt_f %d
+      %fmt_F = OpBitcast %cptr_char %string_F
+   %printf_F = OpExtInst %uint %clext printf %fmt_F %d
+      %fmt_g = OpBitcast %cptr_char %string_g
+   %printf_g = OpExtInst %uint %clext printf %fmt_g %d
+      %fmt_G = OpBitcast %cptr_char %string_G
+   %printf_G = OpExtInst %uint %clext printf %fmt_G %d
+
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm32 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm32
new file mode 100644
index 00000000..61fb8cd0
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm32
@@ -0,0 +1,140 @@
+; kernel void printf_operands_scalar_int32(int i)
+; {
+;     printf("d = %d\n", i);
+;     printf("i = %i\n", i);
+;     printf("o = %o\n", i);
+;     printf("u = %u\n", i);
+;     printf("x = %x\n", i);
+;     printf("X = %X\n", i);
+;
+;     printf("hd = %hd\n", i);
+;     printf("hi = %hi\n", i);
+;     printf("ho = %ho\n", i);
+;     printf("hu = %hu\n", i);
+;     printf("hx = %hx\n", i);
+;     printf("hX = %hX\n", i);
+;
+;     printf("hhd = %hhd\n", i);
+;     printf("hhi = %hhi\n", i);
+;     printf("hho = %hho\n", i);
+;     printf("hhu = %hhu\n", i);
+;     printf("hhx = %hhx\n", i);
+;     printf("hhX = %hhX\n", i);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_int32"
+      %uchar = OpTypeInt 8 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+  %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_X = OpConstant %uchar 88
+    %uchar_d = OpConstant %uchar 100
+    %uchar_h = OpConstant %uchar 104
+    %uchar_i = OpConstant %uchar 105
+    %uchar_o = OpConstant %uchar 111
+    %uchar_u = OpConstant %uchar 117
+    %uchar_x = OpConstant %uchar 120
+   %string_8 = OpTypeArray %uchar %uint_8
+%cptr_string_8 = OpTypePointer UniformConstant %string_8
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+  %string_12 = OpTypeArray %uchar %uint_12
+%cptr_string_12 = OpTypePointer UniformConstant %string_12
+       %void = OpTypeVoid
+ %kernel_sig = OpTypeFunction %void %uint
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+    %array_d = OpConstantComposite %string_8 %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_d %uchar_nl %uchar_nul ; "d = %d\n"
+   %string_d = OpVariable %cptr_string_8 UniformConstant %array_d
+    %array_i = OpConstantComposite %string_8 %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_i %uchar_nl %uchar_nul ; "i = %i\n"
+   %string_i = OpVariable %cptr_string_8 UniformConstant %array_i
+    %array_o = OpConstantComposite %string_8 %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_o %uchar_nl %uchar_nul ; "o = %o\n"
+   %string_o = OpVariable %cptr_string_8 UniformConstant %array_o
+    %array_u = OpConstantComposite %string_8 %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_u %uchar_nl %uchar_nul ; "u = %u\n"
+   %string_u = OpVariable %cptr_string_8 UniformConstant %array_u
+    %array_x = OpConstantComposite %string_8 %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_x %uchar_nl %uchar_nul ; "x = %x\n"
+   %string_x = OpVariable %cptr_string_8 UniformConstant %array_x
+    %array_X = OpConstantComposite %string_8 %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_X %uchar_nl %uchar_nul ; "X = %X\n"
+   %string_X = OpVariable %cptr_string_8 UniformConstant %array_X
+
+   %array_hd = OpConstantComposite %string_10 %uchar_h %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_d %uchar_nl %uchar_nul ; "hd = %hd\n"
+  %string_hd = OpVariable %cptr_string_10 UniformConstant %array_hd
+   %array_hi = OpConstantComposite %string_10 %uchar_h %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_i %uchar_nl %uchar_nul ; "hi = %hi\n"
+  %string_hi = OpVariable %cptr_string_10 UniformConstant %array_hi
+   %array_ho = OpConstantComposite %string_10 %uchar_h %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_o %uchar_nl %uchar_nul ; "ho = %ho\n"
+  %string_ho = OpVariable %cptr_string_10 UniformConstant %array_ho
+   %array_hu = OpConstantComposite %string_10 %uchar_h %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_u %uchar_nl %uchar_nul ; "hu = %hu\n"
+  %string_hu = OpVariable %cptr_string_10 UniformConstant %array_hu
+   %array_hx = OpConstantComposite %string_10 %uchar_h %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_x %uchar_nl %uchar_nul ; "hx = %hx\n"
+  %string_hx = OpVariable %cptr_string_10 UniformConstant %array_hx
+   %array_hX = OpConstantComposite %string_10 %uchar_h %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_X %uchar_nl %uchar_nul ; "hX = %hX\n"
+  %string_hX = OpVariable %cptr_string_10 UniformConstant %array_hX
+
+  %array_hhd = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_d %uchar_nl %uchar_nul ; "hhd = %hhd\n"
+ %string_hhd = OpVariable %cptr_string_12 UniformConstant %array_hhd
+  %array_hhi = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_i %uchar_nl %uchar_nul ; "hhi = %hhi\n"
+ %string_hhi = OpVariable %cptr_string_12 UniformConstant %array_hhi
+  %array_hho = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_o %uchar_nl %uchar_nul ; "hho = %hho\n"
+ %string_hho = OpVariable %cptr_string_12 UniformConstant %array_hho
+  %array_hhu = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_u %uchar_nl %uchar_nul ; "hhu = %hhu\n"
+ %string_hhu = OpVariable %cptr_string_12 UniformConstant %array_hhu
+  %array_hhx = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_x %uchar_nl %uchar_nul ; "hhx = %hhx\n"
+ %string_hhx = OpVariable %cptr_string_12 UniformConstant %array_hhx
+  %array_hhX = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_X %uchar_nl %uchar_nul ; "hhX = %hhX\n"
+ %string_hhX = OpVariable %cptr_string_12 UniformConstant %array_hhX
+
+       %test = OpFunction %void None %kernel_sig
+          %i = OpFunctionParameter %uint
+      %entry = OpLabel
+      %fmt_d = OpBitcast %cptr_char %string_d
+   %printf_d = OpExtInst %uint %clext printf %fmt_d %i
+      %fmt_i = OpBitcast %cptr_char %string_i
+   %printf_i = OpExtInst %uint %clext printf %fmt_i %i
+      %fmt_o = OpBitcast %cptr_char %string_o
+   %printf_o = OpExtInst %uint %clext printf %fmt_o %i
+      %fmt_u = OpBitcast %cptr_char %string_u
+   %printf_u = OpExtInst %uint %clext printf %fmt_u %i
+      %fmt_x = OpBitcast %cptr_char %string_x
+   %printf_x = OpExtInst %uint %clext printf %fmt_x %i
+      %fmt_X = OpBitcast %cptr_char %string_X
+   %printf_X = OpExtInst %uint %clext printf %fmt_X %i
+
+     %fmt_hd = OpBitcast %cptr_char %string_hd
+  %printf_hd = OpExtInst %uint %clext printf %fmt_hd %i
+     %fmt_hi = OpBitcast %cptr_char %string_hi
+  %printf_hi = OpExtInst %uint %clext printf %fmt_hi %i
+     %fmt_ho = OpBitcast %cptr_char %string_ho
+  %printf_ho = OpExtInst %uint %clext printf %fmt_ho %i
+     %fmt_hu = OpBitcast %cptr_char %string_hu
+  %printf_hu = OpExtInst %uint %clext printf %fmt_hu %i
+     %fmt_hx = OpBitcast %cptr_char %string_hx
+  %printf_hx = OpExtInst %uint %clext printf %fmt_hx %i
+     %fmt_hX = OpBitcast %cptr_char %string_hX
+  %printf_hX = OpExtInst %uint %clext printf %fmt_hX %i
+
+    %fmt_hhd = OpBitcast %cptr_char %string_hhd
+ %printf_hhd = OpExtInst %uint %clext printf %fmt_hhd %i
+    %fmt_hhi = OpBitcast %cptr_char %string_hhi
+ %printf_hhi = OpExtInst %uint %clext printf %fmt_hhi %i
+    %fmt_hho = OpBitcast %cptr_char %string_hho
+ %printf_hho = OpExtInst %uint %clext printf %fmt_hho %i
+    %fmt_hhu = OpBitcast %cptr_char %string_hhu
+ %printf_hhu = OpExtInst %uint %clext printf %fmt_hhu %i
+    %fmt_hhx = OpBitcast %cptr_char %string_hhx
+ %printf_hhx = OpExtInst %uint %clext printf %fmt_hhx %i
+    %fmt_hhX = OpBitcast %cptr_char %string_hhX
+ %printf_hhX = OpExtInst %uint %clext printf %fmt_hhX %i
+
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm64 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm64
new file mode 100644
index 00000000..91ad8e1e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm64
@@ -0,0 +1,140 @@
+; kernel void printf_operands_scalar_int32(int i)
+; {
+;     printf("d = %d\n", i);
+;     printf("i = %i\n", i);
+;     printf("o = %o\n", i);
+;     printf("u = %u\n", i);
+;     printf("x = %x\n", i);
+;     printf("X = %X\n", i);
+;
+;     printf("hd = %hd\n", i);
+;     printf("hi = %hi\n", i);
+;     printf("ho = %ho\n", i);
+;     printf("hu = %hu\n", i);
+;     printf("hx = %hx\n", i);
+;     printf("hX = %hX\n", i);
+;
+;     printf("hhd = %hhd\n", i);
+;     printf("hhi = %hhi\n", i);
+;     printf("hho = %hho\n", i);
+;     printf("hhu = %hhu\n", i);
+;     printf("hhx = %hhx\n", i);
+;     printf("hhX = %hhX\n", i);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_int32"
+      %uchar = OpTypeInt 8 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+  %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_X = OpConstant %uchar 88
+    %uchar_d = OpConstant %uchar 100
+    %uchar_h = OpConstant %uchar 104
+    %uchar_i = OpConstant %uchar 105
+    %uchar_o = OpConstant %uchar 111
+    %uchar_u = OpConstant %uchar 117
+    %uchar_x = OpConstant %uchar 120
+   %string_8 = OpTypeArray %uchar %uint_8
+%cptr_string_8 = OpTypePointer UniformConstant %string_8
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+  %string_12 = OpTypeArray %uchar %uint_12
+%cptr_string_12 = OpTypePointer UniformConstant %string_12
+       %void = OpTypeVoid
+ %kernel_sig = OpTypeFunction %void %uint
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+    %array_d = OpConstantComposite %string_8 %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_d %uchar_nl %uchar_nul ; "d = %d\n"
+   %string_d = OpVariable %cptr_string_8 UniformConstant %array_d
+    %array_i = OpConstantComposite %string_8 %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_i %uchar_nl %uchar_nul ; "i = %i\n"
+   %string_i = OpVariable %cptr_string_8 UniformConstant %array_i
+    %array_o = OpConstantComposite %string_8 %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_o %uchar_nl %uchar_nul ; "o = %o\n"
+   %string_o = OpVariable %cptr_string_8 UniformConstant %array_o
+    %array_u = OpConstantComposite %string_8 %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_u %uchar_nl %uchar_nul ; "u = %u\n"
+   %string_u = OpVariable %cptr_string_8 UniformConstant %array_u
+    %array_x = OpConstantComposite %string_8 %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_x %uchar_nl %uchar_nul ; "x = %x\n"
+   %string_x = OpVariable %cptr_string_8 UniformConstant %array_x
+    %array_X = OpConstantComposite %string_8 %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_X %uchar_nl %uchar_nul ; "X = %X\n"
+   %string_X = OpVariable %cptr_string_8 UniformConstant %array_X
+
+   %array_hd = OpConstantComposite %string_10 %uchar_h %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_d %uchar_nl %uchar_nul ; "hd = %hd\n"
+  %string_hd = OpVariable %cptr_string_10 UniformConstant %array_hd
+   %array_hi = OpConstantComposite %string_10 %uchar_h %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_i %uchar_nl %uchar_nul ; "hi = %hi\n"
+  %string_hi = OpVariable %cptr_string_10 UniformConstant %array_hi
+   %array_ho = OpConstantComposite %string_10 %uchar_h %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_o %uchar_nl %uchar_nul ; "ho = %ho\n"
+  %string_ho = OpVariable %cptr_string_10 UniformConstant %array_ho
+   %array_hu = OpConstantComposite %string_10 %uchar_h %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_u %uchar_nl %uchar_nul ; "hu = %hu\n"
+  %string_hu = OpVariable %cptr_string_10 UniformConstant %array_hu
+   %array_hx = OpConstantComposite %string_10 %uchar_h %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_x %uchar_nl %uchar_nul ; "hx = %hx\n"
+  %string_hx = OpVariable %cptr_string_10 UniformConstant %array_hx
+   %array_hX = OpConstantComposite %string_10 %uchar_h %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_X %uchar_nl %uchar_nul ; "hX = %hX\n"
+  %string_hX = OpVariable %cptr_string_10 UniformConstant %array_hX
+
+  %array_hhd = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_d %uchar_nl %uchar_nul ; "hhd = %hhd\n"
+ %string_hhd = OpVariable %cptr_string_12 UniformConstant %array_hhd
+  %array_hhi = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_i %uchar_nl %uchar_nul ; "hhi = %hhi\n"
+ %string_hhi = OpVariable %cptr_string_12 UniformConstant %array_hhi
+  %array_hho = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_o %uchar_nl %uchar_nul ; "hho = %hho\n"
+ %string_hho = OpVariable %cptr_string_12 UniformConstant %array_hho
+  %array_hhu = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_u %uchar_nl %uchar_nul ; "hhu = %hhu\n"
+ %string_hhu = OpVariable %cptr_string_12 UniformConstant %array_hhu
+  %array_hhx = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_x %uchar_nl %uchar_nul ; "hhx = %hhx\n"
+ %string_hhx = OpVariable %cptr_string_12 UniformConstant %array_hhx
+  %array_hhX = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_X %uchar_nl %uchar_nul ; "hhX = %hhX\n"
+ %string_hhX = OpVariable %cptr_string_12 UniformConstant %array_hhX
+
+       %test = OpFunction %void None %kernel_sig
+          %i = OpFunctionParameter %uint
+      %entry = OpLabel
+      %fmt_d = OpBitcast %cptr_char %string_d
+   %printf_d = OpExtInst %uint %clext printf %fmt_d %i
+      %fmt_i = OpBitcast %cptr_char %string_i
+   %printf_i = OpExtInst %uint %clext printf %fmt_i %i
+      %fmt_o = OpBitcast %cptr_char %string_o
+   %printf_o = OpExtInst %uint %clext printf %fmt_o %i
+      %fmt_u = OpBitcast %cptr_char %string_u
+   %printf_u = OpExtInst %uint %clext printf %fmt_u %i
+      %fmt_x = OpBitcast %cptr_char %string_x
+   %printf_x = OpExtInst %uint %clext printf %fmt_x %i
+      %fmt_X = OpBitcast %cptr_char %string_X
+   %printf_X = OpExtInst %uint %clext printf %fmt_X %i
+
+     %fmt_hd = OpBitcast %cptr_char %string_hd
+  %printf_hd = OpExtInst %uint %clext printf %fmt_hd %i
+     %fmt_hi = OpBitcast %cptr_char %string_hi
+  %printf_hi = OpExtInst %uint %clext printf %fmt_hi %i
+     %fmt_ho = OpBitcast %cptr_char %string_ho
+  %printf_ho = OpExtInst %uint %clext printf %fmt_ho %i
+     %fmt_hu = OpBitcast %cptr_char %string_hu
+  %printf_hu = OpExtInst %uint %clext printf %fmt_hu %i
+     %fmt_hx = OpBitcast %cptr_char %string_hx
+  %printf_hx = OpExtInst %uint %clext printf %fmt_hx %i
+     %fmt_hX = OpBitcast %cptr_char %string_hX
+  %printf_hX = OpExtInst %uint %clext printf %fmt_hX %i
+
+    %fmt_hhd = OpBitcast %cptr_char %string_hhd
+ %printf_hhd = OpExtInst %uint %clext printf %fmt_hhd %i
+    %fmt_hhi = OpBitcast %cptr_char %string_hhi
+ %printf_hhi = OpExtInst %uint %clext printf %fmt_hhi %i
+    %fmt_hho = OpBitcast %cptr_char %string_hho
+ %printf_hho = OpExtInst %uint %clext printf %fmt_hho %i
+    %fmt_hhu = OpBitcast %cptr_char %string_hhu
+ %printf_hhu = OpExtInst %uint %clext printf %fmt_hhu %i
+    %fmt_hhx = OpBitcast %cptr_char %string_hhx
+ %printf_hhx = OpExtInst %uint %clext printf %fmt_hhx %i
+    %fmt_hhX = OpBitcast %cptr_char %string_hhX
+ %printf_hhX = OpExtInst %uint %clext printf %fmt_hhX %i
+
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm32 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm32
new file mode 100644
index 00000000..ec19e9f8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm32
@@ -0,0 +1,77 @@
+; kernel void printf_operands_scalar_int64(long l)
+; {
+;     printf("ld = %ld\n", l);
+;     printf("li = %li\n", l);
+;     printf("lo = %lo\n", l);
+;     printf("lu = %lu\n", l);
+;     printf("lx = %lx\n", l);
+;     printf("lX = %lX\n", l);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int64
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_int64"
+      %uchar = OpTypeInt 8 0
+      %ulong = OpTypeInt 64 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+    %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_X = OpConstant %uchar 88
+    %uchar_d = OpConstant %uchar 100
+    %uchar_i = OpConstant %uchar 105
+    %uchar_l = OpConstant %uchar 108
+    %uchar_o = OpConstant %uchar 111
+    %uchar_u = OpConstant %uchar 117
+    %uchar_x = OpConstant %uchar 120
+   %string_8 = OpTypeArray %uchar %uint_8
+%cptr_string_8 = OpTypePointer UniformConstant %string_8
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+  %string_12 = OpTypeArray %uchar %uint_12
+%cptr_string_12 = OpTypePointer UniformConstant %string_12
+       %void = OpTypeVoid
+      %float = OpTypeFloat 32
+ %kernel_sig = OpTypeFunction %void %ulong
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+   %array_ld = OpConstantComposite %string_10 %uchar_l %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_d %uchar_nl %uchar_nul ; "ld = %ld\n"
+  %string_ld = OpVariable %cptr_string_10 UniformConstant %array_ld
+   %array_li = OpConstantComposite %string_10 %uchar_l %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_i %uchar_nl %uchar_nul ; "li = %li\n"
+  %string_li = OpVariable %cptr_string_10 UniformConstant %array_li
+   %array_lo = OpConstantComposite %string_10 %uchar_l %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_o %uchar_nl %uchar_nul ; "lo = %lo\n"
+  %string_lo = OpVariable %cptr_string_10 UniformConstant %array_lo
+   %array_lu = OpConstantComposite %string_10 %uchar_l %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_u %uchar_nl %uchar_nul ; "lu = %lu\n"
+  %string_lu = OpVariable %cptr_string_10 UniformConstant %array_lu
+   %array_lx = OpConstantComposite %string_10 %uchar_l %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_x %uchar_nl %uchar_nul ; "lx = %lx\n"
+  %string_lx = OpVariable %cptr_string_10 UniformConstant %array_lx
+   %array_lX = OpConstantComposite %string_10 %uchar_l %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_X %uchar_nl %uchar_nul ; "lX = %lX\n"
+  %string_lX = OpVariable %cptr_string_10 UniformConstant %array_lX
+
+       %test = OpFunction %void None %kernel_sig
+          %l = OpFunctionParameter %ulong
+      %entry = OpLabel
+
+     %fmt_ld = OpBitcast %cptr_char %string_ld
+  %printf_ld = OpExtInst %uint %clext printf %fmt_ld %l
+     %fmt_li = OpBitcast %cptr_char %string_li
+  %printf_li = OpExtInst %uint %clext printf %fmt_li %l
+     %fmt_lo = OpBitcast %cptr_char %string_lo
+  %printf_lo = OpExtInst %uint %clext printf %fmt_lo %l
+     %fmt_lu = OpBitcast %cptr_char %string_lu
+  %printf_lu = OpExtInst %uint %clext printf %fmt_lu %l
+     %fmt_lx = OpBitcast %cptr_char %string_lx
+  %printf_lx = OpExtInst %uint %clext printf %fmt_lx %l
+     %fmt_lX = OpBitcast %cptr_char %string_lX
+  %printf_lX = OpExtInst %uint %clext printf %fmt_lX %l
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm64 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm64
new file mode 100644
index 00000000..8401d1fe
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm64
@@ -0,0 +1,77 @@
+; kernel void printf_operands_scalar_int64(long l)
+; {
+;     printf("ld = %ld\n", l);
+;     printf("li = %li\n", l);
+;     printf("lo = %lo\n", l);
+;     printf("lu = %lu\n", l);
+;     printf("lx = %lx\n", l);
+;     printf("lX = %lX\n", l);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int64
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_int64"
+      %uchar = OpTypeInt 8 0
+      %ulong = OpTypeInt 64 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+    %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_X = OpConstant %uchar 88
+    %uchar_d = OpConstant %uchar 100
+    %uchar_i = OpConstant %uchar 105
+    %uchar_l = OpConstant %uchar 108
+    %uchar_o = OpConstant %uchar 111
+    %uchar_u = OpConstant %uchar 117
+    %uchar_x = OpConstant %uchar 120
+   %string_8 = OpTypeArray %uchar %uint_8
+%cptr_string_8 = OpTypePointer UniformConstant %string_8
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+  %string_12 = OpTypeArray %uchar %uint_12
+%cptr_string_12 = OpTypePointer UniformConstant %string_12
+       %void = OpTypeVoid
+      %float = OpTypeFloat 32
+ %kernel_sig = OpTypeFunction %void %ulong
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+   %array_ld = OpConstantComposite %string_10 %uchar_l %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_d %uchar_nl %uchar_nul ; "ld = %ld\n"
+  %string_ld = OpVariable %cptr_string_10 UniformConstant %array_ld
+   %array_li = OpConstantComposite %string_10 %uchar_l %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_i %uchar_nl %uchar_nul ; "li = %li\n"
+  %string_li = OpVariable %cptr_string_10 UniformConstant %array_li
+   %array_lo = OpConstantComposite %string_10 %uchar_l %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_o %uchar_nl %uchar_nul ; "lo = %lo\n"
+  %string_lo = OpVariable %cptr_string_10 UniformConstant %array_lo
+   %array_lu = OpConstantComposite %string_10 %uchar_l %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_u %uchar_nl %uchar_nul ; "lu = %lu\n"
+  %string_lu = OpVariable %cptr_string_10 UniformConstant %array_lu
+   %array_lx = OpConstantComposite %string_10 %uchar_l %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_x %uchar_nl %uchar_nul ; "lx = %lx\n"
+  %string_lx = OpVariable %cptr_string_10 UniformConstant %array_lx
+   %array_lX = OpConstantComposite %string_10 %uchar_l %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_X %uchar_nl %uchar_nul ; "lX = %lX\n"
+  %string_lX = OpVariable %cptr_string_10 UniformConstant %array_lX
+
+       %test = OpFunction %void None %kernel_sig
+          %l = OpFunctionParameter %ulong
+      %entry = OpLabel
+
+     %fmt_ld = OpBitcast %cptr_char %string_ld
+  %printf_ld = OpExtInst %uint %clext printf %fmt_ld %l
+     %fmt_li = OpBitcast %cptr_char %string_li
+  %printf_li = OpExtInst %uint %clext printf %fmt_li %l
+     %fmt_lo = OpBitcast %cptr_char %string_lo
+  %printf_lo = OpExtInst %uint %clext printf %fmt_lo %l
+     %fmt_lu = OpBitcast %cptr_char %string_lu
+  %printf_lu = OpExtInst %uint %clext printf %fmt_lu %l
+     %fmt_lx = OpBitcast %cptr_char %string_lx
+  %printf_lx = OpExtInst %uint %clext printf %fmt_lx %l
+     %fmt_lX = OpBitcast %cptr_char %string_lX
+  %printf_lX = OpExtInst %uint %clext printf %fmt_lX %l
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/test_extinst_printf.cpp b/test_conformance/spirv_new/test_extinst_printf.cpp
new file mode 100644
index 00000000..54bb8326
--- /dev/null
+++ b/test_conformance/spirv_new/test_extinst_printf.cpp
@@ -0,0 +1,259 @@
+//
+// Copyright (c) 2025 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "harness/os_helpers.h"
+#include "testBase.h"
+
+#if defined(_WIN32)
+#include <io.h>
+#define streamDup(fd1) _dup(fd1)
+#define streamDup2(fd1, fd2) _dup2(fd1, fd2)
+#else
+#if defined(__APPLE__)
+#include <sys/sysctl.h>
+#endif
+#include <unistd.h>
+#define streamDup(fd1) dup(fd1)
+#define streamDup2(fd1, fd2) dup2(fd1, fd2)
+#endif
+
+#include <fstream>
+#include <vector>
+
+// TODO: Unify with test_printf.
+struct StreamGrabber
+{
+    StreamGrabber()
+    {
+        char* tmp = get_temp_filename();
+        tempFileName = tmp;
+        free(tmp);
+    }
+    ~StreamGrabber()
+    {
+        if (acquired)
+        {
+            release();
+        }
+    }
+
+    int acquire(void)
+    {
+        if (acquired == false)
+        {
+            old_fd = streamDup(fileno(stdout));
+            if (!freopen(tempFileName.c_str(), "w", stdout))
+            {
+                release();
+                return -1;
+            }
+            acquired = true;
+        }
+        return 0;
+    }
+
+    int release(void)
+    {
+        if (acquired == true)
+        {
+            fflush(stdout);
+            streamDup2(old_fd, fileno(stdout));
+            close(old_fd);
+            acquired = false;
+        }
+        return 0;
+    }
+
+    int get_results(std::string& results)
+    {
+        if (acquired == false)
+        {
+            std::ifstream is(tempFileName, std::ios::binary);
+            if (is.good())
+            {
+                size_t filesize = 0;
+                is.seekg(0, std::ios::end);
+                filesize = (size_t)is.tellg();
+                is.seekg(0, std::ios::beg);
+
+                results.clear();
+                results.resize(filesize);
+                is.read(&results[0], filesize);
+
+                return 0;
+            }
+        }
+        return -1;
+    }
+
+    std::string tempFileName;
+    int old_fd = 0;
+    bool acquired = false;
+};
+
+// printf callback, for cl_arm_printf
+void CL_CALLBACK printfCallBack(const char* printf_data, size_t len,
+                                size_t final, void* user_data)
+{
+    fwrite(printf_data, 1, len, stdout);
+}
+
+template <typename T>
+static int printf_operands_helper(cl_device_id device,
+                                  const char* spirvFileName,
+                                  const char* kernelName,
+                                  const char* expectedResults, T value)
+{
+    StreamGrabber grabber;
+    cl_int error;
+
+    // Create a context and a queue to test with.
+    // We cannot use the context and queue from the harness because some
+    // implementations require a printf callback to be set at context creation.
+
+    cl_context_properties printf_properties[] = {
+        CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printfCallBack,
+        CL_PRINTF_BUFFERSIZE_ARM, 256, 0
+    };
+
+    cl_context_properties* props =
+        is_extension_available(device, "cl_arm_printf") ? printf_properties
+                                                        : nullptr;
+
+    clContextWrapper context =
+        clCreateContext(props, 1, &device, notify_callback, nullptr, &error);
+    test_error(error, "Unable to create printf context");
+
+    clCommandQueueWrapper queue =
+        clCreateCommandQueue(context, device, 0, &error);
+    test_error(error, "Unable to create printf queue");
+
+    clProgramWrapper program;
+    error = get_program_with_il(program, device, context, spirvFileName);
+    test_error(error, "Unable to build SPIR-V program");
+
+    clKernelWrapper kernel = clCreateKernel(program, kernelName, &error);
+    test_error(error, "Unable to create SPIR-V kernel");
+
+    error = clSetKernelArg(kernel, 0, sizeof(value), &value);
+    test_error(error, "Unable to set kernel arguments");
+
+    size_t global = 1;
+    grabber.acquire();
+    error |= clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0,
+                                    NULL, NULL);
+    error |= clFinish(queue);
+    grabber.release();
+    test_error(error, "unable to enqueue kernel");
+
+    std::string results;
+    grabber.get_results(results);
+
+    if (results != std::string(expectedResults))
+    {
+        log_error("Results do not match.\n");
+        log_error("Expected: \n---\n%s---\n", expectedResults);
+        log_error("Got: \n---\n%s---\n", results.c_str());
+        return TEST_FAIL;
+    }
+
+    return TEST_PASS;
+}
+
+REGISTER_TEST(extinst_printf_operands_scalar_int32)
+{
+    static const char* expected = R"(d = 1
+i = 1
+o = 1
+u = 1
+x = 1
+X = 1
+hd = 1
+hi = 1
+ho = 1
+hu = 1
+hx = 1
+hX = 1
+hhd = 1
+hhi = 1
+hho = 1
+hhu = 1
+hhx = 1
+hhX = 1
+)";
+
+    return printf_operands_helper(device, "printf_operands_scalar_int32",
+                                  "printf_operands_scalar_int32", expected, 1);
+}
+
+REGISTER_TEST(extinst_printf_operands_scalar_fp32)
+{
+    static const char* expected = R"(a = 0x1.0p+1
+A = 0X1.0P+1
+e = 2.0e+00
+E = 2.0E+00
+f = 2.0
+F = 2.0
+g = 2
+G = 2
+)";
+
+    return printf_operands_helper(device, "printf_operands_scalar_fp32",
+                                  "printf_operands_scalar_fp32", expected,
+                                  2.0f);
+}
+
+REGISTER_TEST(extinst_printf_operands_scalar_int64)
+{
+    static const char* expected = R"(ld = 4
+li = 4
+lo = 4
+lu = 4
+lx = 4
+lX = 4
+)";
+
+    if (!gHasLong)
+    {
+        log_info("Device does not support 64-bit integers. Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    return printf_operands_helper(device, "printf_operands_scalar_int64",
+                                  "printf_operands_scalar_int64", expected, 4L);
+}
+
+REGISTER_TEST(extinst_printf_operands_scalar_fp64)
+{
+    static const char* expected = R"(a = 0x1.0p+3
+A = 0X1.0P+3
+e = 8.0e+00
+E = 8.0E+00
+f = 8.0
+F = 8.0
+g = 8
+G = 8
+)";
+
+    if (!is_extension_available(device, "cl_khr_fp64"))
+    {
+        log_info("Device does not support fp64. Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    return printf_operands_helper(device, "printf_operands_scalar_fp64",
+                                  "printf_operands_scalar_fp64", expected, 8.0);
+}

From be130c9b1c13e33cbd9ee7910a63cf2af5b4849c Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 11 Feb 2025 19:34:29 +0100
Subject: [PATCH 11/12] Removed vulkan sub-tests overlapping with semaphore
 negative tests (#2250)

Fixes #2153 according to issue description.
---
 .../vulkan/test_vulkan_api_consistency.cpp    | 30 -------------------
 1 file changed, 30 deletions(-)

diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
index f3ce4a79..06b48fb0 100644
--- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp
+++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
@@ -476,29 +476,6 @@ struct ConsistencyExternalSemaphoreTest : public VulkanTestBase
             sema_props1.push_back(0);
             sema_props2.push_back(0);
 
-            // Pass NULL properties
-            clCreateSemaphoreWithPropertiesKHRptr(context, NULL, &errNum);
-            test_failure_error(
-                errNum, CL_INVALID_VALUE,
-                "Semaphore creation must fail with CL_INVALID_VALUE "
-                " when properties are passed as NULL");
-
-            // Pass invalid semaphore object to wait
-            errNum = clEnqueueWaitSemaphoresKHRptr(queue, 1, NULL, NULL, 0,
-                                                   NULL, NULL);
-            test_failure_error(errNum, CL_INVALID_SEMAPHORE_KHR,
-                               "clEnqueueWaitSemaphoresKHR fails with "
-                               "CL_INVALID_SEMAPHORE_KHR "
-                               "when invalid semaphore object is passed");
-
-            // Pass invalid semaphore object to signal
-            errNum = clEnqueueSignalSemaphoresKHRptr(queue, 1, NULL, NULL, 0,
-                                                     NULL, NULL);
-            test_failure_error(errNum, CL_INVALID_SEMAPHORE_KHR,
-                               "clEnqueueSignalSemaphoresKHR fails with "
-                               "CL_INVALID_SEMAPHORE_KHR"
-                               "when invalid semaphore object is passed");
-
             // Create two semaphore objects
             clVk2Clsemaphore = clCreateSemaphoreWithPropertiesKHRptr(
                 context, sema_props1.data(), &errNum);
@@ -512,13 +489,6 @@ struct ConsistencyExternalSemaphoreTest : public VulkanTestBase
                 errNum,
                 "Unable to create semaphore with valid semaphore properties");
 
-            // Pass invalid object to release call
-            errNum = clReleaseSemaphoreKHRptr(NULL);
-            test_failure_error(errNum, CL_INVALID_SEMAPHORE_KHR,
-                               "clReleaseSemaphoreKHRptr fails with "
-                               "CL_INVALID_SEMAPHORE_KHR when NULL semaphore "
-                               "object is passed");
-
             // Release both semaphore objects
             errNum = clReleaseSemaphoreKHRptr(clVk2Clsemaphore);
             test_error(errNum, "clReleaseSemaphoreKHRptr failed");

From 3618402c3a8104b238e513ac0c9ce656d3db7b58 Mon Sep 17 00:00:00 2001
From: xinjin01 <149181207+xinjin01@users.noreply.github.com>
Date: Tue, 11 Feb 2025 18:35:56 +0000
Subject: [PATCH 12/12] Remove unsupported raw10/raw12 tests (#2232)

Images with a `CL_UNSIGNED_INT_RAW10_EXT` and
`CL_UNSIGNED_INT_RAW12_EXT` data type are unnormalised, so the
normalised tests with theses images are invalid and will be skipped.

Signed-off-by: Gorazd Sumkovski <gorazd.sumkovski@arm.com>
Signed-off-by: Xin Jin <xin.jin@arm.com>
Co-authored-by: Gorazd Sumkovski <gorazd.sumkovski@arm.com>
---
 .../test_cl_ext_image_raw10_raw12.cpp         | 46 +++++++++++++------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_raw10_raw12.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_raw10_raw12.cpp
index c506528e..e3bd1e48 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_raw10_raw12.cpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_raw10_raw12.cpp
@@ -20,6 +20,7 @@
 
 extern int gTypesToTest;
 extern int gtestTypesToRun;
+extern int gNormalizedModeToUse;
 extern bool gTestImage2DFromBuffer;
 extern cl_mem_flags gMemFlagsToUse;
 
@@ -38,16 +39,20 @@ static int test_image_set(cl_device_id device, cl_context context,
     log_info("---- Supported %s %s formats for this device for "
              "cl_ext_image_raw10_raw12---- \n",
              convert_image_type_to_string(imageType), "read");
-    log_info("  %-7s %-24s %d\n", "CL_R", "CL_UNSIGNED_INT_RAW10_EXT", 0);
-    log_info("  %-7s %-24s %d\n", "CL_R", "CL_UNSIGNED_INT_RAW12_EXT", 0);
+    log_info("  %-7s %-24s %d\n", "CL_R", "CL_UNSIGNED_INT_RAW10_EXT", 1);
+    log_info("  %-7s %-24s %d\n", "CL_R", "CL_UNSIGNED_INT_RAW12_EXT", 1);
     log_info("------------------------------------------- \n");
 
     image_sampler_data imageSampler;
     ImageTestTypes test{ kTestUInt, kUInt, uintFormats, "uint" };
+
     if (gTypesToTest & test.type)
     {
         std::vector<bool> filterFlags(formatList.size(), false);
         imageSampler.filter_mode = CL_FILTER_NEAREST;
+        // `CL_UNSIGNED_INT_RAW10_EXT` and `CL_UNSIGNED_INT_RAW12_EXT` image
+        // channel data types are unnormalised
+        imageSampler.normalized_coords = false;
         ret = test_read_image_formats(device, context, queue, formatList,
                                       filterFlags, &imageSampler,
                                       test.explicitType, imageType);
@@ -60,19 +65,34 @@ int ext_image_raw10_raw12(cl_device_id device, cl_context context,
 {
     int ret = 0;
 
-    if (0 == is_extension_available(device, "cl_ext_image_raw10_raw12"))
+    if (true != gNormalizedModeToUse)
     {
-        log_info("-----------------------------------------------------\n");
-        log_info("This device does not support "
-                 "cl_ext_image_raw10_raw12.\n");
-        log_info("Skipping cl_ext_image_raw10_raw12 "
-                 "image test.\n");
-        log_info("-----------------------------------------------------\n\n");
-        return 0;
+        if (0 == is_extension_available(device, "cl_ext_image_raw10_raw12"))
+        {
+            log_info("-----------------------------------------------------\n");
+            log_info("This device does not support "
+                     "cl_ext_image_raw10_raw12.\n");
+            log_info("Skipping cl_ext_image_raw10_raw12 "
+                     "image test.\n");
+            log_info(
+                "-----------------------------------------------------\n\n");
+            ret = TEST_SKIPPED_ITSELF;
+        }
+        else
+        {
+            gtestTypesToRun = kReadTests;
+            ret +=
+                test_image_set(device, context, queue, CL_MEM_OBJECT_IMAGE2D);
+        }
+    }
+    else
+    {
+        // skip the test if it is forced to be NORMALIZED from the command line
+        // argument i.e. gNormalizedModeToUse is true
+        log_info("cl_ext_image_raw10_raw12 does not support normalized channel "
+                 "components. Skipping the test.\n");
+        ret = TEST_SKIPPED_ITSELF;
     }
-    gtestTypesToRun = kReadTests;
-
-    ret += test_image_set(device, context, queue, CL_MEM_OBJECT_IMAGE2D);
 
     return ret;
 }