From 8272c83c6fb87dc4ac5ad08169fa59f9a07e086b Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Wed, 10 May 2023 10:45:44 +0100
Subject: [PATCH 01/30] math_brute_force: consider all types for extension
 pragmas (#1705)

When generating the kernel code, consider the return type(s) and the
types of all parameters, instead of only the first parameter type.
This fixes a missing extension pragma for certain cases (such as
`nan`).

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/common.cpp | 42 +++++++++++---------
 1 file changed, 24 insertions(+), 18 deletions(-)
diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp
index 71b4defe..83a33516 100644
--- a/test_conformance/math_brute_force/common.cpp
+++ b/test_conformance/math_brute_force/common.cpp
@@ -67,22 +67,28 @@ void EmitDefineUndef(std::ostringstream &kernel, const char *name,
     kernel << "#define " << name << " " << GetUndefValue(type) << '\n';
 }
 
-void EmitEnableExtension(std::ostringstream &kernel, ParameterType type)
+void EmitEnableExtension(std::ostringstream &kernel,
+                         const std::initializer_list<ParameterType> &types)
 {
-    switch (type)
-    {
-        case ParameterType::Double:
-            kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
-            break;
+    bool needsFp64 = false;
 
-        case ParameterType::Float:
-        case ParameterType::Int:
-        case ParameterType::UInt:
-        case ParameterType::Long:
-        case ParameterType::ULong:
-            // No extension required.
-            break;
+    for (const auto &type : types)
+    {
+        switch (type)
+        {
+            case ParameterType::Double: needsFp64 = true; break;
+
+            case ParameterType::Float:
+            case ParameterType::Int:
+            case ParameterType::UInt:
+            case ParameterType::Long:
+            case ParameterType::ULong:
+                // No extension required.
+                break;
+        }
     }
+
+    if (needsFp64) kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
 }
 
 std::string GetBuildOptions(bool relaxed_mode)
@@ -123,7 +129,7 @@ std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
     EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
     EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
     EmitDefineUndef(kernel, "UNDEF1", type1);
-    EmitEnableExtension(kernel, type1);
+    EmitEnableExtension(kernel, { retType, type1 });
 
     // clang-format off
     const char *kernel_nonvec3[] = { R"(
@@ -199,7 +205,7 @@ std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
     EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
     EmitDefineUndef(kernel, "UNDEF1", type1);
     EmitDefineUndef(kernel, "UNDEFR2", retType2);
-    EmitEnableExtension(kernel, type1);
+    EmitEnableExtension(kernel, { retType1, retType2, type1 });
 
     // clang-format off
     const char *kernel_nonvec3[] = { R"(
@@ -282,7 +288,7 @@ std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
     EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
     EmitDefineUndef(kernel, "UNDEF1", type1);
     EmitDefineUndef(kernel, "UNDEF2", type2);
-    EmitEnableExtension(kernel, type1);
+    EmitEnableExtension(kernel, { retType, type1, type2 });
 
     const bool is_vec3 = sizeValues[vector_size_index] == 3;
 
@@ -384,7 +390,7 @@ std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
     EmitDefineUndef(kernel, "UNDEF1", type1);
     EmitDefineUndef(kernel, "UNDEF2", type2);
     EmitDefineUndef(kernel, "UNDEFR2", retType2);
-    EmitEnableExtension(kernel, type1);
+    EmitEnableExtension(kernel, { retType1, retType2, type1, type2 });
 
     // clang-format off
     const char *kernel_nonvec3[] = { R"(
@@ -476,7 +482,7 @@ std::string GetTernaryKernel(const std::string &kernel_name,
     EmitDefineUndef(kernel, "UNDEF1", type1);
     EmitDefineUndef(kernel, "UNDEF2", type2);
     EmitDefineUndef(kernel, "UNDEF3", type3);
-    EmitEnableExtension(kernel, type1);
+    EmitEnableExtension(kernel, { retType, type1, type2, type3 });
 
     // clang-format off
     const char *kernel_nonvec3[] = { R"(

From d223e46f20ec8aed92ae88798fa4e38879dbdd85 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 11 May 2023 14:12:09 +0100
Subject: [PATCH 02/30] atomics: Remove unused variable in atomic_add_index_bin
 (#1709)

Remove a variable that was set when an OpenCL API call failed, but
never read again.  Instead, return immediately upon failure.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/atomics/test_indexed_cases.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp
index 2bba3e24..7da2dfa7 100644
--- a/test_conformance/atomics/test_indexed_cases.cpp
+++ b/test_conformance/atomics/test_indexed_cases.cpp
@@ -201,7 +201,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
     int number_of_bins = number_of_items / divisor;
     int max_counts_per_bin = divisor * 2;
 
-    int fail = 0;
     int err;
 
     clProgramWrapper program;
@@ -345,7 +344,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
     {
         log_error("add_index_bin_test FAILED to set kernel arguments: %d\n",
                   err);
-        fail = 1;
         return -1;
     }
 
@@ -354,7 +352,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
     if (err)
     {
         log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
-        fail = 1;
+        return -1;
     }
 
     cl_int *final_bin_assignments =
@@ -372,7 +370,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
     if (err)
     {
         log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
-        fail = 1;
+        return -1;
     }
 
     cl_int *final_bin_counts =
@@ -390,7 +388,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
     {
         log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
                   err);
-        fail = 1;
+        return -1;
     }
 
     // Verification.

From b23268acf5f407bfa3c53ae7a538f1f7a9588b10 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Sat, 13 May 2023 10:21:13 +0100
Subject: [PATCH 03/30] math_brute_force: don't get build log after
 clCreateKernel (#1722)

The OpenCL specification states that the build log is only for
clBuildProgram, clCompileProgram or clLinkProgram.  Calling it after
clCreateKernel should not give any additional information, so this is
effectively dead code.  In case building failed, any logs would
already have been printed by create_single_kernel_helper.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/common.cpp | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp
index 83a33516..47f493e7 100644
--- a/test_conformance/math_brute_force/common.cpp
+++ b/test_conformance/math_brute_force/common.cpp
@@ -591,14 +591,6 @@ cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
         if (!kernel || error != CL_SUCCESS)
         {
             vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
-            size_t log_size;
-            clGetProgramBuildInfo(program, gDevice, CL_PROGRAM_BUILD_LOG, 0,
-                                  nullptr, &log_size);
-            std::string buffer;
-            buffer.resize(log_size + 1);
-            clGetProgramBuildInfo(program, gDevice, CL_PROGRAM_BUILD_LOG,
-                                  log_size, &buffer[0], NULL);
-            vlog_error("Log: %s\n", buffer.c_str());
             return error;
         }
     }

From 06c0c99c2eda7376709a991fc9c41c02aaec0c09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=A9vin=20Petit?= <kpet@free.fr>
Date: Sat, 13 May 2023 10:21:34 +0100
Subject: [PATCH 04/30] Enable clCopyImage suite to run in parallel (#1717)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Kévin Petit <kpet@free.fr>
---
 .../images/clCopyImage/test_copy_generic.cpp          | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp
index 3bd1b6ef..3e0b60d9 100644
--- a/test_conformance/images/clCopyImage/test_copy_generic.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp
@@ -289,12 +289,6 @@ cl_mem create_image( cl_context context, cl_command_queue queue, BufferOwningPtr
     return img;
 }
 
-// WARNING -- not thread safe
-BufferOwningPtr<char> srcData;
-BufferOwningPtr<char> dstData;
-BufferOwningPtr<char> srcHost;
-BufferOwningPtr<char> dstHost;
-
 int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                             const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d )
 {
@@ -302,6 +296,11 @@ int test_copy_image_generic( cl_context context, cl_command_queue queue, image_d
 
     clMemWrapper srcImage, dstImage;
 
+    BufferOwningPtr<char> srcData;
+    BufferOwningPtr<char> dstData;
+    BufferOwningPtr<char> srcHost;
+    BufferOwningPtr<char> dstHost;
+
     if( gDebugTrace )
         log_info( " ++ Entering inner test loop...\n" );
 

From 1884042f5ddd520bdcc83c4398ac396883990745 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 16 May 2023 09:58:04 +0100
Subject: [PATCH 05/30] [NFC] cmake: do not suppress -Wunused-but-set-variable
 globally (#1723)

Only disable `-Wunused-but-set-variable` for tests that do not compile
cleanly with this warning enabled.  This re-enables the warning for
most other tests, so that it can catch any new occurrences.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 CMakeLists.txt                                           | 1 -
 test_conformance/basic/CMakeLists.txt                    | 2 ++
 test_conformance/conversions/CMakeLists.txt              | 2 ++
 test_conformance/device_timer/CMakeLists.txt             | 2 ++
 test_conformance/images/clCopyImage/CMakeLists.txt       | 2 ++
 test_conformance/images/clReadWriteImage/CMakeLists.txt  | 2 ++
 test_conformance/images/kernel_read_write/CMakeLists.txt | 2 +-
 test_conformance/mem_host_flags/CMakeLists.txt           | 2 ++
 test_conformance/non_uniform_work_group/CMakeLists.txt   | 2 ++
 9 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fc072670..4fce58d8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -105,7 +105,6 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang"
         add_cxx_flag_if_supported(-Wall)
         # Suppress warnings that currently trigger on the code base.
         # This list should shrink over time when warnings are fixed.
-        add_cxx_flag_if_supported(-Wno-unused-but-set-variable)
         add_cxx_flag_if_supported(-Wno-sometimes-uninitialized)
         add_cxx_flag_if_supported(-Wno-sign-compare)
     endif()
diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt
index 6d61f0cf..dde3311d 100644
--- a/test_conformance/basic/CMakeLists.txt
+++ b/test_conformance/basic/CMakeLists.txt
@@ -70,4 +70,6 @@ if(APPLE)
     list(APPEND ${MODULE_NAME}_SOURCES test_queue_priority.cpp)
 endif(APPLE)
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../CMakeCommon.txt)
diff --git a/test_conformance/conversions/CMakeLists.txt b/test_conformance/conversions/CMakeLists.txt
index 523b6ead..cc019b26 100644
--- a/test_conformance/conversions/CMakeLists.txt
+++ b/test_conformance/conversions/CMakeLists.txt
@@ -16,4 +16,6 @@ set_source_files_properties(
         COMPILE_FLAGS -march=i686)
 endif(NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID)
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../CMakeCommon.txt)
diff --git a/test_conformance/device_timer/CMakeLists.txt b/test_conformance/device_timer/CMakeLists.txt
index 4af7c7f7..a24d8d24 100644
--- a/test_conformance/device_timer/CMakeLists.txt
+++ b/test_conformance/device_timer/CMakeLists.txt
@@ -5,4 +5,6 @@ set(${MODULE_NAME}_SOURCES
     test_device_timer.cpp
 )
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../CMakeCommon.txt)
diff --git a/test_conformance/images/clCopyImage/CMakeLists.txt b/test_conformance/images/clCopyImage/CMakeLists.txt
index d8aace41..bf06dc68 100644
--- a/test_conformance/images/clCopyImage/CMakeLists.txt
+++ b/test_conformance/images/clCopyImage/CMakeLists.txt
@@ -15,5 +15,7 @@ set(${MODULE_NAME}_SOURCES
     ../common.cpp
 )
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../../CMakeCommon.txt)
 
diff --git a/test_conformance/images/clReadWriteImage/CMakeLists.txt b/test_conformance/images/clReadWriteImage/CMakeLists.txt
index 9308bbfe..bc1600ff 100644
--- a/test_conformance/images/clReadWriteImage/CMakeLists.txt
+++ b/test_conformance/images/clReadWriteImage/CMakeLists.txt
@@ -11,5 +11,7 @@ set(${MODULE_NAME}_SOURCES
     ../common.cpp
 )
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../../CMakeCommon.txt)
 
diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt
index ccd678c1..b5527c74 100644
--- a/test_conformance/images/kernel_read_write/CMakeLists.txt
+++ b/test_conformance/images/kernel_read_write/CMakeLists.txt
@@ -21,7 +21,7 @@ set(${MODULE_NAME}_SOURCES
 
 # Make unused variables not fatal in this module; see
 # https://github.com/KhronosGroup/OpenCL-CTS/issues/1484
-set_gnulike_module_compile_flags("-Wno-error=unused-variable")
+set_gnulike_module_compile_flags("-Wno-error=unused-variable -Wno-unused-but-set-variable")
 
 include(../../CMakeCommon.txt)
 
diff --git a/test_conformance/mem_host_flags/CMakeLists.txt b/test_conformance/mem_host_flags/CMakeLists.txt
index 73a36f0d..4f2b960d 100644
--- a/test_conformance/mem_host_flags/CMakeLists.txt
+++ b/test_conformance/mem_host_flags/CMakeLists.txt
@@ -6,4 +6,6 @@ set(${MODULE_NAME}_SOURCES
     mem_host_image.cpp
 )
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../CMakeCommon.txt)
diff --git a/test_conformance/non_uniform_work_group/CMakeLists.txt b/test_conformance/non_uniform_work_group/CMakeLists.txt
index 30c3a846..f78dd195 100644
--- a/test_conformance/non_uniform_work_group/CMakeLists.txt
+++ b/test_conformance/non_uniform_work_group/CMakeLists.txt
@@ -10,6 +10,8 @@ set(${MODULE_NAME}_SOURCES
     tools.cpp
 )
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../CMakeCommon.txt)
 
 # end of file #

From f31b2f029c9e33b018460666ebf47950fa9d6224 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 16 May 2023 17:43:47 +0200
Subject: [PATCH 06/30] Added cl_half support for test_relationals (#1623)

* Added cl_khr_fp16 support for test_relationals (issue #142, relationals)

* Added cl_khr_fp16 support for any and bitselect test cases (issue #142, relationals)

* correction related to automated travis build for macOS (issue #142, relationals)

* more corrections related to automated travis build for macOS (issue #142, relationals)

* Added few cosmetic corrections (issue #142, test_relationals)

* Added missing clang format

* Added corrections related to order of initialization

* Added corrections due to code review (issue #142, relationals)

* Correction for prev commit

* Added subnormals related condition for test verification (issue #142, relationals)

* Added indexing correction due to code review

* Replaced hardcoded iteration limit (issue #142, relationals)
---
 test_conformance/printf/test_printf.cpp       |  13 +-
 test_conformance/relationals/CMakeLists.txt   |   3 +-
 .../relationals/test_comparisons_double.cpp   | 363 ----------
 .../relationals/test_comparisons_float.cpp    | 362 ----------
 .../relationals/test_comparisons_fp.cpp       | 661 ++++++++++++++++++
 .../relationals/test_comparisons_fp.h         | 227 ++++++
 .../relationals/test_relationals.cpp          | 224 +++---
 7 files changed, 1008 insertions(+), 845 deletions(-)
 delete mode 100644 test_conformance/relationals/test_comparisons_double.cpp
 delete mode 100644 test_conformance/relationals/test_comparisons_float.cpp
 create mode 100644 test_conformance/relationals/test_comparisons_fp.cpp
 create mode 100644 test_conformance/relationals/test_comparisons_fp.h

diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index e789e0ca..e43e302f 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -268,7 +268,7 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
     };
 
     //Update testname
-    sprintf(testname,"%s%d","test",testId);
+    std::snprintf(testname, sizeof(testname), "%s%d", "test", testId);
 
     if (allTestCase[testId]->_type == TYPE_HALF
         || allTestCase[testId]->_type == TYPE_HALF_LIMITS)
@@ -278,13 +278,18 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
     //Update addrSpaceArgument and addrSpacePAddArgument types, based on FULL_PROFILE/EMBEDDED_PROFILE
     if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE)
     {
-        sprintf(addrSpaceArgument, "%s",allTestCase[testId]->_genParameters[testNum].addrSpaceArgumentTypeQualifier);
+        std::snprintf(addrSpaceArgument, sizeof(addrSpaceArgument), "%s",
+                      allTestCase[testId]
+                          ->_genParameters[testNum]
+                          .addrSpaceArgumentTypeQualifier);
 
-        sprintf(addrSpacePAddArgument, "%s", allTestCase[testId]->_genParameters[testNum].addrSpacePAdd);
+        std::snprintf(
+            addrSpacePAddArgument, sizeof(addrSpacePAddArgument), "%s",
+            allTestCase[testId]->_genParameters[testNum].addrSpacePAdd);
     }
 
     if (strlen(addrSpaceArgument) == 0)
-        sprintf(addrSpaceArgument,"void");
+        std::snprintf(addrSpaceArgument, sizeof(addrSpaceArgument), "void");
 
     // create program based on its type
 
diff --git a/test_conformance/relationals/CMakeLists.txt b/test_conformance/relationals/CMakeLists.txt
index ecaa056c..aa5dd6a1 100644
--- a/test_conformance/relationals/CMakeLists.txt
+++ b/test_conformance/relationals/CMakeLists.txt
@@ -3,8 +3,7 @@ set(MODULE_NAME RELATIONALS)
 set(${MODULE_NAME}_SOURCES
     main.cpp
     test_relationals.cpp
-    test_comparisons_float.cpp
-    test_comparisons_double.cpp
+    test_comparisons_fp.cpp
     test_shuffles.cpp
 )
 
diff --git a/test_conformance/relationals/test_comparisons_double.cpp b/test_conformance/relationals/test_comparisons_double.cpp
deleted file mode 100644
index 3fe1124c..00000000
--- a/test_conformance/relationals/test_comparisons_double.cpp
+++ /dev/null
@@ -1,363 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-
-#define TEST_SIZE 512
-
-const char *equivTestKernelPattern_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
-"    destValuesB[tid] = sourceA[tid] %s sourceB[tid];\n"
-"\n"
-"}\n";
-
-const char *equivTestKernelPatternLessGreater_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
-"    destValuesB[tid] = (sourceA[tid] < sourceB[tid]) | (sourceA[tid] > sourceB[tid]);\n"
-"\n"
-"}\n";
-
-
-const char *equivTestKernelPattern_double3 =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    double3 sampA = vload3(tid, (__global double *)sourceA);\n"
-"    double3 sampB = vload3(tid, (__global double *)sourceB);\n"
-"    vstore3(%s( sampA, sampB ), tid, (__global long *)destValues);\n"
-"    vstore3(( sampA %s sampB ), tid, (__global long *)destValuesB);\n"
-"\n"
-"}\n";
-
-const char *equivTestKernelPatternLessGreater_double3 =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    double3 sampA = vload3(tid, (__global double *)sourceA);\n"
-"    double3 sampB = vload3(tid, (__global double *)sourceB);\n"
-"    vstore3(%s( sampA, sampB ), tid, (__global long *)destValues);\n"
-"    vstore3(( sampA < sampB ) | (sampA > sampB), tid, (__global long *)destValuesB);\n"
-"\n"
-"}\n";
-
-
-typedef bool (*equivVerifyFn)( double inDataA, double inDataB );
-
-void verify_equiv_values_double( unsigned int vecSize, double *inDataA, double *inDataB, cl_long *outData, equivVerifyFn verifyFn )
-{
-    unsigned int i;
-    cl_long trueResult;
-    bool result;
-
-    trueResult = ( vecSize == 1 ) ? 1 : -1;
-    for( i = 0; i < vecSize; i++ )
-    {
-        result = verifyFn( inDataA[ i ], inDataB[ i ] );
-        outData[ i ] = result ? trueResult : 0;
-    }
-}
-
-void generate_equiv_test_data_double( double *outData, unsigned int vecSize, bool alpha, MTdata d )
-{
-    unsigned int i;
-
-    generate_random_data( kDouble, vecSize * TEST_SIZE, d, outData );
-
-    // Fill the first few vectors with NAN in each vector element (or the second set if we're alpha, so we can test either case)
-    if( alpha )
-        outData += vecSize * vecSize;
-    for( i = 0; i < vecSize; i++ )
-    {
-        outData[ 0 ] = NAN;
-        outData += vecSize + 1;
-    }
-    // Make sure the third set is filled regardless, to test the case where both have NANs
-    if( !alpha )
-        outData += vecSize * vecSize;
-    for( i = 0; i < vecSize; i++ )
-    {
-        outData[ 0 ] = NAN;
-        outData += vecSize + 1;
-    }
-}
-
-int test_equiv_kernel_double(cl_context context, cl_command_queue queue, const char *fnName, const char *opName,
-                             unsigned int vecSize, equivVerifyFn verifyFn, MTdata d )
-{
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[4];
-    double inDataA[TEST_SIZE * 16], inDataB[ TEST_SIZE * 16 ];
-    cl_long outData[TEST_SIZE * 16], expected[16];
-    int error, i, j;
-    size_t threads[1], localThreads[1];
-    char kernelSource[10240];
-    char *programPtr;
-    char sizeName[4];
-
-
-    /* Create the source */
-    if( vecSize == 1 )
-        sizeName[ 0 ] = 0;
-    else
-        sprintf( sizeName, "%d", vecSize );
-
-    if(DENSE_PACK_VECS && vecSize == 3) {
-        if (strcmp(fnName, "islessgreater")) {
-            sprintf( kernelSource, equivTestKernelPattern_double3, sizeName, sizeName, sizeName, sizeName, fnName, opName );
-        } else {
-            sprintf( kernelSource, equivTestKernelPatternLessGreater_double3, sizeName, sizeName, sizeName, sizeName, fnName );
-        }
-    } else {
-        if (strcmp(fnName, "islessgreater")) {
-            sprintf( kernelSource, equivTestKernelPattern_double, sizeName, sizeName, sizeName, sizeName, fnName, opName );
-        } else {
-            sprintf( kernelSource, equivTestKernelPatternLessGreater_double, sizeName, sizeName, sizeName, sizeName, fnName );
-        }
-    }
-
-    /* Create kernels */
-    programPtr = kernelSource;
-    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
-    {
-        return -1;
-    }
-
-    /* Generate some streams */
-    generate_equiv_test_data_double( inDataA, vecSize, true, d );
-    generate_equiv_test_data_double( inDataB, vecSize, false, d );
-
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_double) * vecSize * TEST_SIZE,
-                                &inDataA, &error);
-    if( streams[0] == NULL )
-    {
-        print_error( error, "Creating input array A failed!\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_double) * vecSize * TEST_SIZE,
-                                &inDataB, &error);
-    if( streams[1] == NULL )
-    {
-        print_error( error, "Creating input array A failed!\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_long ) * vecSize * TEST_SIZE, NULL, &error);
-    if( streams[2] == NULL )
-    {
-        print_error( error, "Creating output array failed!\n");
-        return -1;
-    }
-    streams[3] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_long ) * vecSize * TEST_SIZE, NULL, &error);
-    if( streams[3] == NULL )
-    {
-        print_error( error, "Creating output array failed!\n");
-        return -1;
-    }
-
-
-    /* Assign streams and execute */
-    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-
-
-    /* Run the kernel */
-    threads[0] = TEST_SIZE;
-
-    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Unable to execute test kernel" );
-
-    /* Now get the results */
-    error = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof( cl_long ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
-    test_error( error, "Unable to read output array!" );
-
-    /* And verify! */
-    for( i = 0; i < TEST_SIZE; i++ )
-    {
-        verify_equiv_values_double( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
-
-        for( j = 0; j < (int)vecSize; j++ )
-        {
-            if( expected[ j ] != outData[ i * vecSize + j ] )
-            {
-                log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %lld, got %lld, source %f,%f\n",
-                          i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
-                return -1;
-            }
-        }
-    }
-
-    /* Now get the results */
-    error = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof( cl_long ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
-    test_error( error, "Unable to read output array!" );
-
-    /* And verify! */
-    for( i = 0; i < TEST_SIZE; i++ )
-    {
-        verify_equiv_values_double( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
-
-        for( j = 0; j < (int)vecSize; j++ )
-        {
-            if( expected[ j ] != outData[ i * vecSize + j ] )
-            {
-                log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %lld, got %lld, source %f,%f\n",
-                          i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-int test_equiv_kernel_set_double(cl_device_id device, cl_context context, cl_command_queue queue, const char *fnName, const char *opName, equivVerifyFn verifyFn, MTdata d )
-{
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int index;
-    int retVal = 0;
-
-    if (!is_extension_available(device, "cl_khr_fp64")) {
-        log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
-        return 0;
-    }
-    log_info("Testing doubles.\n");
-
-    for( index = 0; vecSizes[ index ] != 0; index++ )
-    {
-        // Test!
-        if( test_equiv_kernel_double(context, queue, fnName, opName, vecSizes[ index ], verifyFn, d ) != 0 )
-        {
-            log_error( "   Vector double%d FAILED\n", vecSizes[ index ] );
-            retVal = -1;
-        }
-    }
-
-    return retVal;
-}
-
-bool isequal_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return false;
-    return valueA == valueB;
-}
-
-int test_relational_isequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "isequal", "==", isequal_verify_fn_double, seed );
-}
-
-bool isnotequal_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return true;
-    return valueA != valueB;
-}
-
-int test_relational_isnotequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "isnotequal", "!=", isnotequal_verify_fn_double, seed );
-}
-
-bool isgreater_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return false;
-    return valueA > valueB;
-}
-
-int test_relational_isgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "isgreater", ">", isgreater_verify_fn_double, seed );
-}
-
-bool isgreaterequal_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return false;
-    return valueA >= valueB;
-}
-
-int test_relational_isgreaterequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "isgreaterequal", ">=", isgreaterequal_verify_fn_double, seed );
-}
-
-bool isless_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return false;
-    return valueA < valueB;
-}
-
-int test_relational_isless_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "isless", "<", isless_verify_fn_double, seed );
-}
-
-bool islessequal_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return false;
-    return valueA <= valueB;
-}
-
-int test_relational_islessequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "islessequal", "<=", islessequal_verify_fn_double, seed );
-}
-
-bool islessgreater_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return false;
-    return ( valueA < valueB ) || ( valueA > valueB );
-}
-
-int test_relational_islessgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "islessgreater", "<>", islessgreater_verify_fn_double, seed );
-}
-
-
diff --git a/test_conformance/relationals/test_comparisons_float.cpp b/test_conformance/relationals/test_comparisons_float.cpp
deleted file mode 100644
index 274cd71b..00000000
--- a/test_conformance/relationals/test_comparisons_float.cpp
+++ /dev/null
@@ -1,362 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-
-#define TEST_SIZE 512
-
-const char *equivTestKernelPattern_float =
-"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
-"    destValuesB[tid] = sourceA[tid] %s sourceB[tid];\n"
-"\n"
-"}\n";
-
-const char *equivTestKernelPatternLessGreater_float =
-"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
-"    destValuesB[tid] = (sourceA[tid] < sourceB[tid]) | (sourceA[tid] > sourceB[tid]);\n"
-"\n"
-"}\n";
-
-
-const char *equivTestKernelPattern_float3 =
-"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    float3 sampA = vload3(tid, (__global float *)sourceA);\n"
-"    float3 sampB = vload3(tid, (__global float *)sourceB);\n"
-"    vstore3(%s( sampA, sampB ), tid, (__global int *)destValues);\n"
-"    vstore3(( sampA %s sampB ), tid, (__global int *)destValuesB);\n"
-"\n"
-"}\n";
-
-const char *equivTestKernelPatternLessGreater_float3 =
-"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    float3 sampA = vload3(tid, (__global float *)sourceA);\n"
-"    float3 sampB = vload3(tid, (__global float *)sourceB);\n"
-"    vstore3(%s( sampA, sampB ), tid, (__global int *)destValues);\n"
-"    vstore3(( sampA < sampB ) | (sampA > sampB), tid, (__global int *)destValuesB);\n"
-"\n"
-"}\n";
-
-typedef bool (*equivVerifyFn)( float inDataA, float inDataB );
-
-int IsFloatInfinity(float x)
-{
-    return isinf(x);
-}
-
-int IsFloatNaN(float x)
-{
-    return isnan(x);
-}
-
-void verify_equiv_values_float( unsigned int vecSize, float *inDataA, float *inDataB, int *outData, equivVerifyFn verifyFn )
-{
-    unsigned int i;
-    int trueResult;
-    bool result;
-
-    trueResult = ( vecSize == 1 ) ? 1 : -1;
-    for( i = 0; i < vecSize; i++ )
-    {
-        result = verifyFn( inDataA[ i ], inDataB[ i ] );
-        outData[ i ] = result ? trueResult : 0;
-    }
-}
-
-void generate_equiv_test_data_float( float *outData, unsigned int vecSize, bool alpha, MTdata d )
-{
-    unsigned int i;
-
-    generate_random_data( kFloat, vecSize * TEST_SIZE, d, outData );
-
-    // Fill the first few vectors with NAN in each vector element (or the second set if we're alpha, so we can test either case)
-    if( alpha )
-        outData += vecSize * vecSize;
-    for( i = 0; i < vecSize; i++ )
-    {
-        outData[ 0 ] = NAN;
-        outData += vecSize + 1;
-    }
-    // Make sure the third set is filled regardless, to test the case where both have NANs
-    if( !alpha )
-        outData += vecSize * vecSize;
-    for( i = 0; i < vecSize; i++ )
-    {
-        outData[ 0 ] = NAN;
-        outData += vecSize + 1;
-    }
-}
-
-int test_equiv_kernel_float(cl_context context, cl_command_queue queue, const char *fnName, const char *opName,
-                       unsigned int vecSize, equivVerifyFn verifyFn, MTdata d )
-{
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[4];
-    float inDataA[TEST_SIZE * 16], inDataB[ TEST_SIZE * 16 ];
-    int outData[TEST_SIZE * 16], expected[16];
-    int error, i, j;
-    size_t threads[1], localThreads[1];
-    char kernelSource[10240];
-    char *programPtr;
-    char sizeName[4];
-
-
-    /* Create the source */
-    if( vecSize == 1 )
-        sizeName[ 0 ] = 0;
-    else
-        sprintf( sizeName, "%d", vecSize );
-
-
-    if(DENSE_PACK_VECS && vecSize == 3) {
-  if (strcmp(fnName, "islessgreater")) {
-            sprintf( kernelSource, equivTestKernelPattern_float3, sizeName, sizeName, sizeName, sizeName, fnName, opName );
-        } else {
-            sprintf( kernelSource, equivTestKernelPatternLessGreater_float3, sizeName, sizeName, sizeName, sizeName, fnName );
-        }
-    } else {
-        if (strcmp(fnName, "islessgreater")) {
-          sprintf( kernelSource, equivTestKernelPattern_float, sizeName, sizeName, sizeName, sizeName, fnName, opName );
-  } else {
-    sprintf( kernelSource, equivTestKernelPatternLessGreater_float, sizeName, sizeName, sizeName, sizeName, fnName );
-  }
-    }
-
-    /* Create kernels */
-    programPtr = kernelSource;
-    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
-    {
-        return -1;
-    }
-
-    /* Generate some streams */
-    generate_equiv_test_data_float( inDataA, vecSize, true, d );
-    generate_equiv_test_data_float( inDataB, vecSize, false, d );
-
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_float) * vecSize * TEST_SIZE,
-                                &inDataA, &error);
-    if( streams[0] == NULL )
-    {
-        print_error( error, "Creating input array A failed!\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_float) * vecSize * TEST_SIZE,
-                                &inDataB, &error);
-    if( streams[1] == NULL )
-    {
-        print_error( error, "Creating input array A failed!\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_int ) * vecSize * TEST_SIZE, NULL, &error);
-    if( streams[2] == NULL )
-    {
-        print_error( error, "Creating output array failed!\n");
-        return -1;
-    }
-  streams[3] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_int ) * vecSize * TEST_SIZE, NULL, &error);
-    if( streams[3] == NULL )
-    {
-        print_error( error, "Creating output array failed!\n");
-        return -1;
-    }
-
-
-    /* Assign streams and execute */
-    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-  error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-
-
-    /* Run the kernel */
-    threads[0] = TEST_SIZE;
-
-    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Unable to execute test kernel" );
-
-  /* Now get the results */
-    error = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof( int ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
-    test_error( error, "Unable to read output array!" );
-
-  /* And verify! */
-  for( i = 0; i < TEST_SIZE; i++ )
-  {
-        verify_equiv_values_float( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
-
-        for( j = 0; j < (int)vecSize; j++ )
-        {
-            if( expected[ j ] != outData[ i * vecSize + j ] )
-            {
-                log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %d, got %d, source %f,%f\n",
-                  i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
-                return -1;
-            }
-        }
-  }
-
-  /* Now get the results */
-    error = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof( int ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
-    test_error( error, "Unable to read output array!" );
-
-  /* And verify! */
-    int fail = 0;
-    for( i = 0; i < TEST_SIZE; i++ )
-    {
-        verify_equiv_values_float( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
-
-        for( j = 0; j < (int)vecSize; j++ )
-        {
-            if( expected[ j ] != outData[ i * vecSize + j ] )
-            {
-                if (gInfNanSupport == 0)
-                {
-                    if (IsFloatNaN(inDataA[i*vecSize + j]) || IsFloatNaN (inDataB[i*vecSize + j]))
-                    {
-                        fail = 0;
-                    }
-                    else
-                        fail = 1;
-                }
-                if (fail)
-                {
-                    log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %d, got %d, source %f,%f\n",
-                      i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
-                    return -1;
-                }
-            }
-        }
-  }
-
-  return 0;
-}
-
-int test_equiv_kernel_set_float(cl_context context, cl_command_queue queue, const char *fnName, const char *opName, equivVerifyFn verifyFn, MTdata d )
-{
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int index;
-    int retVal = 0;
-
-    for( index = 0; vecSizes[ index ] != 0; index++ )
-    {
-        // Test!
-        if( test_equiv_kernel_float(context, queue, fnName, opName, vecSizes[ index ], verifyFn, d ) != 0 )
-        {
-            log_error( "   Vector float%d FAILED\n", vecSizes[ index ] );
-            retVal = -1;
-        }
-    }
-
-    return retVal;
-}
-
-bool isequal_verify_fn_float( float valueA, float valueB )
-{
-    return valueA == valueB;
-}
-
-int test_relational_isequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "isequal", "==", isequal_verify_fn_float, seed );
-}
-
-bool isnotequal_verify_fn_float( float valueA, float valueB )
-{
-    return valueA != valueB;
-}
-
-int test_relational_isnotequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "isnotequal", "!=", isnotequal_verify_fn_float, seed );
-}
-
-bool isgreater_verify_fn_float( float valueA, float valueB )
-{
-    return valueA > valueB;
-}
-
-int test_relational_isgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "isgreater", ">", isgreater_verify_fn_float, seed );
-}
-
-bool isgreaterequal_verify_fn_float( float valueA, float valueB )
-{
-    return valueA >= valueB;
-}
-
-int test_relational_isgreaterequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "isgreaterequal", ">=", isgreaterequal_verify_fn_float, seed );
-}
-
-bool isless_verify_fn_float( float valueA, float valueB )
-{
-    return valueA < valueB;
-}
-
-int test_relational_isless_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "isless", "<", isless_verify_fn_float, seed );
-}
-
-bool islessequal_verify_fn_float( float valueA, float valueB )
-{
-    return valueA <= valueB;
-}
-
-int test_relational_islessequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "islessequal", "<=", islessequal_verify_fn_float, seed );
-}
-
-bool islessgreater_verify_fn_float( float valueA, float valueB )
-{
-    return ( valueA < valueB ) || ( valueA > valueB );
-}
-
-int test_relational_islessgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "islessgreater", "<>", islessgreater_verify_fn_float, seed );
-}
-
-
diff --git a/test_conformance/relationals/test_comparisons_fp.cpp b/test_conformance/relationals/test_comparisons_fp.cpp
new file mode 100644
index 00000000..580b7422
--- /dev/null
+++ b/test_conformance/relationals/test_comparisons_fp.cpp
@@ -0,0 +1,661 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <iostream>
+#include <map>
+#include <memory>
+#include <stdexcept>
+#include <vector>
+
+#include <CL/cl_half.h>
+
+#include "test_comparisons_fp.h"
+
+#define TEST_SIZE 512
+
+static char ftype[32] = { 0 };
+static char ftype_vec[32] = { 0 };
+static char itype[32] = { 0 };
+static char itype_vec[32] = { 0 };
+static char extension[128] = { 0 };
+
+// clang-format off
+// for readability sake keep this section unformatted
+const char* equivTestKernPat[] = {
+extension,
+"__kernel void sample_test(__global ", ftype_vec, " *sourceA, __global ", ftype_vec,
+" *sourceB, __global ", itype_vec, " *destValues, __global ", itype_vec, " *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
+"    destValuesB[tid] = sourceA[tid] %s sourceB[tid];\n"
+"}\n"};
+
+const char* equivTestKernPatLessGreater[] = {
+extension,
+"__kernel void sample_test(__global ", ftype_vec, " *sourceA, __global ", ftype_vec,
+" *sourceB, __global ", itype_vec, " *destValues, __global ", itype_vec, " *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
+"    destValuesB[tid] = (sourceA[tid] < sourceB[tid]) | (sourceA[tid] > sourceB[tid]);\n"
+"}\n"};
+
+const char* equivTestKerPat_3[] = {
+extension,
+"__kernel void sample_test(__global ", ftype_vec, " *sourceA, __global ", ftype_vec,
+" *sourceB, __global ", itype_vec, " *destValues, __global ", itype_vec, " *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    ",ftype_vec," sampA = vload3(tid, (__global ",ftype," *)sourceA);\n"
+"    ",ftype_vec," sampB = vload3(tid, (__global ",ftype," *)sourceB);\n"
+"    vstore3(%s( sampA, sampB ), tid, (__global ",itype," *)destValues);\n"
+"    vstore3(( sampA %s sampB ), tid, (__global ",itype," *)destValuesB);\n"
+"}\n"};
+
+const char* equivTestKerPatLessGreater_3[] = {
+extension,
+"__kernel void sample_test(__global ", ftype_vec, " *sourceA, __global ", ftype_vec,
+" *sourceB, __global ", itype_vec, " *destValues, __global ", itype_vec, " *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    ", ftype_vec, " sampA = vload3(tid, (__global ", ftype, " *)sourceA);\n"
+"    ", ftype_vec, " sampB = vload3(tid, (__global ", ftype, " *)sourceB);\n"
+"    vstore3(%s( sampA, sampB ), tid, (__global ", itype, " *)destValues);\n"
+"    vstore3(( sampA < sampB ) | (sampA > sampB), tid, (__global ", itype, " *)destValuesB);\n"
+"}\n"
+};
+// clang-format on
+
+
+std::string concat_kernel(const char* sstr[], int num)
+{
+    std::string res;
+    for (int i = 0; i < num; i++) res += std::string(sstr[i]);
+    return res;
+}
+
+template <typename... Args>
+std::string string_format(const std::string& format, Args... args)
+{
+    int size_s = std::snprintf(nullptr, 0, format.c_str(), args...)
+        + 1; // Extra space for '\0'
+    if (size_s <= 0)
+    {
+        throw std::runtime_error("Error during formatting.");
+    }
+    auto size = static_cast<size_t>(size_s);
+    std::unique_ptr<char[]> buf(new char[size]);
+    std::snprintf(buf.get(), size, format.c_str(), args...);
+    return std::string(buf.get(),
+                       buf.get() + size - 1); // We don't want the '\0' inside
+}
+
+template <typename T, typename F> bool verify(const T& A, const T& B)
+{
+    return F()(A, B);
+}
+
+RelationalsFPTest::RelationalsFPTest(cl_context context, cl_device_id device,
+                                     cl_command_queue queue, const char* fn,
+                                     const char* op)
+    : context(context), device(device), queue(queue), fnName(fn), opName(op),
+      halfFlushDenormsToZero(0)
+{
+    // hardcoded for now, to be changed into typeid().name solution in future
+    // for now C++ spec doesn't guarantee human readable type name
+
+    eqTypeNames = { { kHalf, "short" },
+                    { kFloat, "int" },
+                    { kDouble, "long" } };
+}
+
+template <typename T>
+void RelationalsFPTest::generate_equiv_test_data(T* outData,
+                                                 unsigned int vecSize,
+                                                 bool alpha,
+                                                 const RelTestParams<T>& param,
+                                                 const MTdata& d)
+{
+    unsigned int i;
+
+    generate_random_data(param.dataType, vecSize * TEST_SIZE, d, outData);
+
+    // Fill the first few vectors with NAN in each vector element (or the second
+    // set if we're alpha, so we can test either case)
+    if (alpha) outData += vecSize * vecSize;
+    for (i = 0; i < vecSize; i++)
+    {
+        outData[0] = param.nan;
+        outData += vecSize + 1;
+    }
+    // Make sure the third set is filled regardless, to test the case where both
+    // have NANs
+    if (!alpha) outData += vecSize * vecSize;
+    for (i = 0; i < vecSize; i++)
+    {
+        outData[0] = param.nan;
+        outData += vecSize + 1;
+    }
+}
+
+template <typename T, typename U>
+void RelationalsFPTest::verify_equiv_values(unsigned int vecSize,
+                                            const T* const inDataA,
+                                            const T* const inDataB,
+                                            U* const outData,
+                                            const VerifyFunc<T>& verifyFn)
+{
+    unsigned int i;
+    int trueResult;
+    bool result;
+
+    trueResult = (vecSize == 1) ? 1 : -1;
+    for (i = 0; i < vecSize; i++)
+    {
+        result = verifyFn(inDataA[i], inDataB[i]);
+        outData[i] = result ? trueResult : 0;
+    }
+}
+
+template <typename T>
+int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize,
+                                         const RelTestParams<T>& param,
+                                         const MTdata& d)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[4];
+    T inDataA[TEST_SIZE * 16], inDataB[TEST_SIZE * 16];
+
+    // support half, float, double equivalents - otherwise assert
+    typedef typename std::conditional<
+        (sizeof(T) == sizeof(std::int16_t)), std::int16_t,
+        typename std::conditional<(sizeof(T) == sizeof(std::int32_t)),
+                                  std::int32_t, std::int64_t>::type>::type U;
+
+    U outData[TEST_SIZE * 16], expected[16];
+    int error, i, j;
+    size_t threads[1], localThreads[1];
+    std::string kernelSource;
+    char sizeName[4];
+
+    /* Create the source */
+    if (vecSize == 1)
+        sizeName[0] = 0;
+    else
+        sprintf(sizeName, "%d", vecSize);
+
+    if (eqTypeNames.find(param.dataType) == eqTypeNames.end())
+        log_error(
+            "RelationalsFPTest::test_equiv_kernel: unsupported fp data type");
+
+    sprintf(ftype, "%s", get_explicit_type_name(param.dataType));
+    sprintf(ftype_vec, "%s%s", get_explicit_type_name(param.dataType),
+            sizeName);
+
+    sprintf(itype, "%s", eqTypeNames[param.dataType].c_str());
+    sprintf(itype_vec, "%s%s", eqTypeNames[param.dataType].c_str(), sizeName);
+
+    if (std::is_same<T, double>::value)
+        strcpy(extension, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n");
+    else if (std::is_same<T, cl_half>::value)
+        strcpy(extension, "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n");
+    else
+        extension[0] = '\0';
+
+    if (DENSE_PACK_VECS && vecSize == 3)
+    {
+        if (strcmp(fnName.c_str(), "islessgreater"))
+        {
+            auto str =
+                concat_kernel(equivTestKerPat_3,
+                              sizeof(equivTestKerPat_3) / sizeof(const char*));
+            kernelSource = string_format(str, fnName.c_str(), opName.c_str());
+        }
+        else
+        {
+            auto str = concat_kernel(equivTestKerPatLessGreater_3,
+                                     sizeof(equivTestKerPatLessGreater_3)
+                                         / sizeof(const char*));
+            kernelSource = string_format(str, fnName.c_str());
+        }
+    }
+    else
+    {
+        if (strcmp(fnName.c_str(), "islessgreater"))
+        {
+            auto str =
+                concat_kernel(equivTestKernPat,
+                              sizeof(equivTestKernPat) / sizeof(const char*));
+            kernelSource = string_format(str, fnName.c_str(), opName.c_str());
+        }
+        else
+        {
+            auto str = concat_kernel(equivTestKernPatLessGreater,
+                                     sizeof(equivTestKernPatLessGreater)
+                                         / sizeof(const char*));
+            kernelSource = string_format(str, fnName.c_str());
+        }
+    }
+
+    /* Create kernels */
+    const char* programPtr = kernelSource.c_str();
+    if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                    (const char**)&programPtr, "sample_test"))
+    {
+        return -1;
+    }
+
+    /* Generate some streams */
+    generate_equiv_test_data<T>(inDataA, vecSize, true, param, d);
+    generate_equiv_test_data<T>(inDataB, vecSize, false, param, d);
+
+    streams[0] =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                       sizeof(T) * vecSize * TEST_SIZE, &inDataA, &error);
+    if (streams[0] == NULL)
+    {
+        print_error(error, "Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                       sizeof(T) * vecSize * TEST_SIZE, &inDataB, &error);
+    if (streams[1] == NULL)
+    {
+        print_error(error, "Creating input array A failed!\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                sizeof(U) * vecSize * TEST_SIZE, NULL, &error);
+    if (streams[2] == NULL)
+    {
+        print_error(error, "Creating output array failed!\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                sizeof(U) * vecSize * TEST_SIZE, NULL, &error);
+    if (streams[3] == NULL)
+    {
+        print_error(error, "Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Assign streams and execute */
+    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    test_error(error, "Unable to set indexed kernel arguments");
+    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+    test_error(error, "Unable to set indexed kernel arguments");
+    error = clSetKernelArg(kernel, 2, sizeof(streams[2]), &streams[2]);
+    test_error(error, "Unable to set indexed kernel arguments");
+    error = clSetKernelArg(kernel, 3, sizeof(streams[3]), &streams[3]);
+    test_error(error, "Unable to set indexed kernel arguments");
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size(context, kernel, threads[0],
+                                           &localThreads[0]);
+    test_error(error, "Unable to get work group size to use");
+
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, NULL);
+    test_error(error, "Unable to execute test kernel");
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer(queue, streams[2], true, 0,
+                                sizeof(U) * TEST_SIZE * vecSize, outData, 0,
+                                NULL, NULL);
+    test_error(error, "Unable to read output array!");
+
+    auto verror_msg = [](const int& i, const int& j, const unsigned& vs,
+                         const U& e, const U& o, const T& iA, const T& iB) {
+        std::stringstream sstr;
+        sstr << "ERROR: Data sample " << i << ":" << j << " at size " << vs
+             << " does not validate! Expected " << e << ", got " << o
+             << ", source " << iA << ":" << iB << std::endl;
+        log_error(sstr.str().c_str());
+    };
+
+    /* And verify! */
+    for (i = 0; i < TEST_SIZE; i++)
+    {
+        verify_equiv_values<T, U>(vecSize, &inDataA[i * vecSize],
+                                  &inDataB[i * vecSize], expected,
+                                  param.verifyFn);
+
+        for (j = 0; j < (int)vecSize; j++)
+        {
+            if (expected[j] != outData[i * vecSize + j])
+            {
+                bool acceptFail = true;
+                if (std::is_same<T, cl_half>::value)
+                {
+                    bool in_denorm = IsHalfSubnormal(inDataA[i * vecSize + j])
+                        || IsHalfSubnormal(inDataB[i * vecSize + j]);
+
+                    if (halfFlushDenormsToZero && in_denorm)
+                    {
+                        acceptFail = false;
+                    }
+                }
+
+                if (acceptFail)
+                {
+                    verror_msg(
+                        i, j, vecSize, expected[j], outData[i * vecSize + j],
+                        inDataA[i * vecSize + j], inDataB[i * vecSize + j]);
+                    return -1;
+                }
+            }
+        }
+    }
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer(queue, streams[3], true, 0,
+                                sizeof(U) * TEST_SIZE * vecSize, outData, 0,
+                                NULL, NULL);
+    test_error(error, "Unable to read output array!");
+
+    /* And verify! */
+    int fail = 0;
+    for (i = 0; i < TEST_SIZE; i++)
+    {
+        verify_equiv_values<T, U>(vecSize, &inDataA[i * vecSize],
+                                  &inDataB[i * vecSize], expected,
+                                  param.verifyFn);
+
+        for (j = 0; j < (int)vecSize; j++)
+        {
+            if (expected[j] != outData[i * vecSize + j])
+            {
+                if (std::is_same<T, float>::value)
+                {
+                    if (gInfNanSupport == 0)
+                    {
+                        if (isnan(inDataA[i * vecSize + j])
+                            || isnan(inDataB[i * vecSize + j]))
+                            fail = 0;
+                        else
+                            fail = 1;
+                    }
+                    if (fail)
+                    {
+                        verror_msg(i, j, vecSize, expected[j],
+                                   outData[i * vecSize + j],
+                                   inDataA[i * vecSize + j],
+                                   inDataB[i * vecSize + j]);
+                        return -1;
+                    }
+                }
+                else if (std::is_same<T, cl_half>::value)
+                {
+                    bool in_denorm = IsHalfSubnormal(inDataA[i * vecSize + j])
+                        || IsHalfSubnormal(inDataB[i * vecSize + j]);
+
+                    if (!(halfFlushDenormsToZero && in_denorm))
+                    {
+                        verror_msg(i, j, vecSize, expected[j],
+                                   outData[i * vecSize + j],
+                                   inDataA[i * vecSize + j],
+                                   inDataB[i * vecSize + j]);
+                        return -1;
+                    }
+                }
+                else
+                {
+                    verror_msg(
+                        i, j, vecSize, expected[j], outData[i * vecSize + j],
+                        inDataA[i * vecSize + j], inDataB[i * vecSize + j]);
+                    return -1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+template <typename T>
+int RelationalsFPTest::test_relational(int numElements,
+                                       const RelTestParams<T>& param)
+{
+    RandomSeed seed(gRandomSeed);
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int index;
+    int retVal = 0;
+
+    for (index = 0; vecSizes[index] != 0; index++)
+    {
+        // Test!
+        if (test_equiv_kernel<T>(vecSizes[index], param, seed) != 0)
+        {
+            log_error("   Vector %s%d FAILED\n", ftype, vecSizes[index]);
+            retVal = -1;
+        }
+    }
+    return retVal;
+}
+
+cl_int RelationalsFPTest::SetUp(int elements)
+{
+    if (is_extension_available(device, "cl_khr_fp16"))
+    {
+        cl_device_fp_config config = 0;
+        cl_int error = clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG,
+                                       sizeof(config), &config, NULL);
+        test_error(error, "Unable to get device CL_DEVICE_HALF_FP_CONFIG");
+
+        halfFlushDenormsToZero = (0 == (config & CL_FP_DENORM));
+        log_info("Supports half precision denormals: %s\n",
+                 halfFlushDenormsToZero ? "NO" : "YES");
+    }
+
+    return CL_SUCCESS;
+}
+
+cl_int RelationalsFPTest::Run()
+{
+    cl_int error = CL_SUCCESS;
+    for (auto&& param : params)
+    {
+        switch (param->dataType)
+        {
+            case kHalf:
+                error = test_relational<cl_half>(
+                    num_elements, *((RelTestParams<cl_half>*)param.get()));
+                break;
+            case kFloat:
+                error = test_relational<float>(
+                    num_elements, *((RelTestParams<float>*)param.get()));
+                break;
+            case kDouble:
+                error = test_relational<double>(
+                    num_elements, *((RelTestParams<double>*)param.get()));
+                break;
+            default:
+                test_error(-1, "RelationalsFPTest::Run: incorrect fp type");
+                break;
+        }
+        test_error(error, "RelationalsFPTest::Run: test_relational failed");
+    }
+    return CL_SUCCESS;
+}
+
+cl_int IsEqualFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_equals_to>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, std::equal_to<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, std::equal_to<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+cl_int IsNotEqualFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_not_equals_to>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, std::not_equal_to<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, std::not_equal_to<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+cl_int IsGreaterFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_greater>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, std::greater<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, std::greater<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+cl_int IsGreaterEqualFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_greater_equal>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, std::greater_equal<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, std::greater_equal<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+cl_int IsLessFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_less>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, std::less<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, std::less<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+cl_int IsLessEqualFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_less_equal>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, std::less_equal<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, std::less_equal<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+cl_int IsLessGreaterFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_less_greater>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, less_greater<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, less_greater<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+int test_relational_isequal(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsEqualFPTest>(device, context, queue, numElements);
+}
+
+int test_relational_isnotequal(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsNotEqualFPTest>(device, context, queue,
+                                            numElements);
+}
+
+int test_relational_isgreater(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsGreaterFPTest>(device, context, queue, numElements);
+}
+
+int test_relational_isgreaterequal(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsGreaterEqualFPTest>(device, context, queue,
+                                                numElements);
+}
+
+int test_relational_isless(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsLessFPTest>(device, context, queue, numElements);
+}
+
+int test_relational_islessequal(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsLessEqualFPTest>(device, context, queue,
+                                             numElements);
+}
+
+int test_relational_islessgreater(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsLessGreaterFPTest>(device, context, queue,
+                                               numElements);
+}
diff --git a/test_conformance/relationals/test_comparisons_fp.h b/test_conformance/relationals/test_comparisons_fp.h
new file mode 100644
index 00000000..7faca1c5
--- /dev/null
+++ b/test_conformance/relationals/test_comparisons_fp.h
@@ -0,0 +1,227 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _TEST_COMPARISONS_FP_H
+#define _TEST_COMPARISONS_FP_H
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <CL/cl_half.h>
+
+#include "testBase.h"
+
+#define HALF_NAN 0x7e00
+template <typename T> using VerifyFunc = bool (*)(const T &, const T &);
+
+struct RelTestBase
+{
+    explicit RelTestBase(const ExplicitTypes &dt): dataType(dt) {}
+    ExplicitTypes dataType;
+};
+
+template <typename T> struct RelTestParams : public RelTestBase
+{
+    RelTestParams(const VerifyFunc<T> &vfn, const ExplicitTypes &dt,
+                  const T &nan_)
+        : RelTestBase(dt), verifyFn(vfn), nan(nan_)
+    {}
+
+    VerifyFunc<T> verifyFn;
+    T nan;
+};
+
+struct RelationalsFPTest
+{
+    RelationalsFPTest(cl_context context, cl_device_id device,
+                      cl_command_queue queue, const char *fn, const char *op);
+
+    virtual cl_int SetUp(int elements);
+
+    // Test body returning an OpenCL error code
+    virtual cl_int Run();
+
+    template <typename T>
+    void generate_equiv_test_data(T *, unsigned int, bool,
+                                  const RelTestParams<T> &, const MTdata &);
+
+    template <typename T, typename U>
+    void verify_equiv_values(unsigned int, const T *const, const T *const,
+                             U *const, const VerifyFunc<T> &);
+
+    template <typename T>
+    int test_equiv_kernel(unsigned int vecSize, const RelTestParams<T> &param,
+                          const MTdata &d);
+
+    template <typename T>
+    int test_relational(int numElements, const RelTestParams<T> &param);
+
+protected:
+    cl_context context;
+    cl_device_id device;
+    cl_command_queue queue;
+
+    std::string fnName;
+    std::string opName;
+
+    std::vector<std::unique_ptr<RelTestBase>> params;
+    std::map<ExplicitTypes, std::string> eqTypeNames;
+    size_t num_elements;
+
+    int halfFlushDenormsToZero;
+};
+
+struct IsEqualFPTest : public RelationalsFPTest
+{
+    IsEqualFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "isequal", "==")
+    {}
+    cl_int SetUp(int elements) override;
+
+    // for correct handling nan/inf we need fp value
+    struct half_equals_to
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            return cl_half_to_float(lhs) == cl_half_to_float(rhs);
+        }
+    };
+};
+
+struct IsNotEqualFPTest : public RelationalsFPTest
+{
+    IsNotEqualFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "isnotequal", "!=")
+    {}
+    cl_int SetUp(int elements) override;
+
+    // for correct handling nan/inf we need fp value
+    struct half_not_equals_to
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            return cl_half_to_float(lhs) != cl_half_to_float(rhs);
+        }
+    };
+};
+
+struct IsGreaterFPTest : public RelationalsFPTest
+{
+    IsGreaterFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "isgreater", ">")
+    {}
+    cl_int SetUp(int elements) override;
+
+    struct half_greater
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            return cl_half_to_float(lhs) > cl_half_to_float(rhs);
+        }
+    };
+};
+
+struct IsGreaterEqualFPTest : public RelationalsFPTest
+{
+    IsGreaterEqualFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "isgreaterequal", ">=")
+    {}
+    cl_int SetUp(int elements) override;
+
+    struct half_greater_equal
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            return cl_half_to_float(lhs) >= cl_half_to_float(rhs);
+        }
+    };
+};
+
+struct IsLessFPTest : public RelationalsFPTest
+{
+    IsLessFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "isless", "<")
+    {}
+    cl_int SetUp(int elements) override;
+
+    struct half_less
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            return cl_half_to_float(lhs) < cl_half_to_float(rhs);
+        }
+    };
+};
+
+struct IsLessEqualFPTest : public RelationalsFPTest
+{
+    IsLessEqualFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "islessequal", "<=")
+    {}
+    cl_int SetUp(int elements) override;
+
+    struct half_less_equal
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            return cl_half_to_float(lhs) <= cl_half_to_float(rhs);
+        }
+    };
+};
+
+struct IsLessGreaterFPTest : public RelationalsFPTest
+{
+    IsLessGreaterFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "islessgreater", "<>")
+    {}
+    cl_int SetUp(int elements) override;
+
+    struct half_less_greater
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            float flhs = cl_half_to_float(lhs), frhs = cl_half_to_float(rhs);
+            return (flhs < frhs) || (flhs > frhs);
+        }
+    };
+
+    template <typename T> struct less_greater
+    {
+        bool operator()(const T &lhs, const T &rhs) const
+        {
+            return (lhs < rhs) || (lhs > rhs);
+        }
+    };
+};
+
+template <class T>
+int MakeAndRunTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int num_elements)
+{
+    auto test_fixture = T(device, context, queue);
+
+    cl_int error = test_fixture.SetUp(num_elements);
+    test_error_ret(error, "Error in test initialization", TEST_FAIL);
+
+    error = test_fixture.Run();
+    test_error_ret(error, "Test Failed", TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+#endif // _TEST_COMPARISONS_FP_H
diff --git a/test_conformance/relationals/test_relationals.cpp b/test_conformance/relationals/test_relationals.cpp
index 5a874af7..d744fb2a 100644
--- a/test_conformance/relationals/test_relationals.cpp
+++ b/test_conformance/relationals/test_relationals.cpp
@@ -18,8 +18,11 @@
 #include "harness/typeWrappers.h"
 #include "harness/testHarness.h"
 
+// clang-format off
+
 const char *anyAllTestKernelPattern =
 "%s\n" // optional pragma
+"%s\n" // optional pragma
 "__kernel void sample_test(__global %s%s *sourceA, __global int *destValues)\n"
 "{\n"
 "    int  tid = get_global_id(0);\n"
@@ -29,6 +32,7 @@ const char *anyAllTestKernelPattern =
 
 const char *anyAllTestKernelPatternVload =
 "%s\n" // optional pragma
+"%s\n" // optional pragma
 "__kernel void sample_test(__global %s%s *sourceA, __global int *destValues)\n"
 "{\n"
 "    int  tid = get_global_id(0);\n"
@@ -36,6 +40,8 @@ const char *anyAllTestKernelPatternVload =
 "\n"
 "}\n";
 
+// clang-format on
+
 #define TEST_SIZE 512
 
 typedef int (*anyAllVerifyFn)( ExplicitType vecType, unsigned int vecSize, void *inData );
@@ -67,14 +73,22 @@ int test_any_all_kernel(cl_context context, cl_command_queue queue,
              get_explicit_type_name( vecType ), sizeName);
     if(DENSE_PACK_VECS && vecSize == 3) {
         // anyAllTestKernelPatternVload
-        sprintf( kernelSource, anyAllTestKernelPatternVload,
-                vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( vecType ), sizeName, fnName,
-                get_explicit_type_name(vecType));
+        sprintf(
+            kernelSource, anyAllTestKernelPatternVload,
+            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
+                               : "",
+            vecType == kHalf ? "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"
+                             : "",
+            get_explicit_type_name(vecType), sizeName, fnName,
+            get_explicit_type_name(vecType));
     } else {
-        sprintf( kernelSource, anyAllTestKernelPattern,
-                vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( vecType ), sizeName, fnName );
+        sprintf(
+            kernelSource, anyAllTestKernelPattern,
+            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
+                               : "",
+            vecType == kHalf ? "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"
+                             : "",
+            get_explicit_type_name(vecType), sizeName, fnName);
     }
     /* Create kernels */
     programPtr = kernelSource;
@@ -282,8 +296,11 @@ int test_relational_all(cl_device_id device, cl_context context, cl_command_queu
     return retVal;
 }
 
+// clang-format off
+
 const char *selectTestKernelPattern =
 "%s\n" // optional pragma
+"%s\n" // optional pragma
 "__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *sourceC, __global %s%s *destValues)\n"
 "{\n"
 "    int  tid = get_global_id(0);\n"
@@ -294,6 +311,7 @@ const char *selectTestKernelPattern =
 
 const char *selectTestKernelPatternVload =
 "%s\n" // optional pragma
+"%s\n" // optional pragma
 "__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *sourceC, __global %s%s *destValues)\n"
 "{\n"
 "    int  tid = get_global_id(0);\n"
@@ -302,6 +320,8 @@ const char *selectTestKernelPatternVload =
 "\n"
 "}\n";
 
+// clang-format on
+
 typedef void (*selectVerifyFn)( ExplicitType vecType, ExplicitType testVecType, unsigned int vecSize, void *inDataA, void *inDataB, void *inDataTest, void *outData );
 
 int test_select_kernel(cl_context context, cl_command_queue queue, const char *fnName,
@@ -335,26 +355,34 @@ int test_select_kernel(cl_context context, cl_command_queue queue, const char *f
 
     if(DENSE_PACK_VECS && vecSize == 3) {
         // anyAllTestKernelPatternVload
-        sprintf( kernelSource, selectTestKernelPatternVload,
-                (vecType == kDouble || testVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( vecType ), sizeName,
-                get_explicit_type_name( vecType ), sizeName,
-                get_explicit_type_name( testVecType ), sizeName,
-                get_explicit_type_name( vecType ), outSizeName,
-                get_explicit_type_name( vecType ), sizeName,
-                fnName,
-                get_explicit_type_name( vecType ),
-                get_explicit_type_name( vecType ),
-                get_explicit_type_name( vecType ),
-                get_explicit_type_name( testVecType ) );
+        sprintf(kernelSource, selectTestKernelPatternVload,
+                (vecType == kDouble || testVecType == kDouble)
+                    ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
+                    : "",
+                (vecType == kHalf || testVecType == kHalf)
+                    ? "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"
+                    : "",
+                get_explicit_type_name(vecType), sizeName,
+                get_explicit_type_name(vecType), sizeName,
+                get_explicit_type_name(testVecType), sizeName,
+                get_explicit_type_name(vecType), outSizeName,
+                get_explicit_type_name(vecType), sizeName, fnName,
+                get_explicit_type_name(vecType),
+                get_explicit_type_name(vecType),
+                get_explicit_type_name(vecType),
+                get_explicit_type_name(testVecType));
     } else {
-        sprintf( kernelSource, selectTestKernelPattern,
-                (vecType == kDouble || testVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( vecType ), sizeName,
-                get_explicit_type_name( vecType ), sizeName,
-                get_explicit_type_name( testVecType ), sizeName,
-                get_explicit_type_name( vecType ), outSizeName,
-                fnName );
+        sprintf(kernelSource, selectTestKernelPattern,
+                (vecType == kDouble || testVecType == kDouble)
+                    ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
+                    : "",
+                (vecType == kHalf || testVecType == kHalf)
+                    ? "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"
+                    : "",
+                get_explicit_type_name(vecType), sizeName,
+                get_explicit_type_name(vecType), sizeName,
+                get_explicit_type_name(testVecType), sizeName,
+                get_explicit_type_name(vecType), outSizeName, fnName);
     }
 
     /* Create kernels */
@@ -500,14 +528,17 @@ void bitselect_verify_fn( ExplicitType vecType, ExplicitType testVecType, unsign
 
 int test_relational_bitselect(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
 {
-    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    constexpr ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort,
+                                         kInt,  kUInt,  kLong,  kULong,
+                                         kHalf, kFloat, kDouble };
+    constexpr auto vecTypeSize = sizeof(vecType) / sizeof(ExplicitType);
     unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
     unsigned int index, typeIndex;
     int retVal = 0;
     RandomSeed seed( gRandomSeed );
 
 
-    for( typeIndex = 0; typeIndex < 10; typeIndex++ )
+    for (typeIndex = 0; typeIndex < vecTypeSize; typeIndex++)
     {
         if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong)
             continue;
@@ -522,6 +553,19 @@ int test_relational_bitselect(cl_device_id device, cl_context context, cl_comman
             else
                 log_info("Testing doubles.\n");
         }
+
+        if (vecType[typeIndex] == kHalf)
+        {
+            if (!is_extension_available(device, "cl_khr_fp16"))
+            {
+                log_info("Extension cl_khr_fp16 not supported; skipping half "
+                         "tests.\n");
+                continue;
+            }
+            else
+                log_info("Testing halfs.\n");
+        }
+
         for( index = 0; vecSizes[ index ] != 0; index++ )
         {
             // Test!
@@ -584,14 +628,18 @@ void select_signed_verify_fn( ExplicitType vecType, ExplicitType testVecType, un
 
 int test_relational_select_signed(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
 {
-    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    constexpr ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort,
+                                         kInt,  kUInt,  kLong,  kULong,
+                                         kHalf, kFloat, kDouble };
+    constexpr auto vecTypeSize = sizeof(vecType) / sizeof(ExplicitType);
+
     ExplicitType testVecType[] = { kChar, kShort, kInt, kLong, kNumExplicitTypes };
     unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
     unsigned int index, typeIndex, testTypeIndex;
     int retVal = 0;
     RandomSeed seed( gRandomSeed );
 
-    for( typeIndex = 0; typeIndex < 10; typeIndex++ )
+    for (typeIndex = 0; typeIndex < vecTypeSize; typeIndex++)
     {
         if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong)
             continue;
@@ -604,6 +652,19 @@ int test_relational_select_signed(cl_device_id device, cl_context context, cl_co
                 log_info("Testing doubles.\n");
             }
         }
+        if (vecType[typeIndex] == kHalf)
+        {
+            if (!is_extension_available(device, "cl_khr_fp16"))
+            {
+                log_info("Extension cl_khr_fp16 not supported; skipping half "
+                         "tests.\n");
+                continue;
+            }
+            else
+            {
+                log_info("Testing halfs.\n");
+            }
+        }
         for( testTypeIndex = 0; testVecType[ testTypeIndex ] != kNumExplicitTypes; testTypeIndex++ )
         {
             if( testVecType[ testTypeIndex ] != vecType[ typeIndex ] )
@@ -673,7 +734,11 @@ void select_unsigned_verify_fn( ExplicitType vecType, ExplicitType testVecType,
 
 int test_relational_select_unsigned(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
 {
-    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    constexpr ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort,
+                                         kInt,  kUInt,  kLong,  kULong,
+                                         kHalf, kFloat, kDouble };
+    constexpr auto vecTypeSize = sizeof(vecType) / sizeof(ExplicitType);
+
     ExplicitType testVecType[] = { kUChar, kUShort, kUInt, kULong, kNumExplicitTypes };
     unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
     unsigned int index, typeIndex, testTypeIndex;
@@ -681,7 +746,7 @@ int test_relational_select_unsigned(cl_device_id device, cl_context context, cl_
     RandomSeed seed(gRandomSeed);
 
 
-    for( typeIndex = 0; typeIndex < 10; typeIndex++ )
+    for (typeIndex = 0; typeIndex < vecTypeSize; typeIndex++)
     {
         if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong)
             continue;
@@ -694,6 +759,19 @@ int test_relational_select_unsigned(cl_device_id device, cl_context context, cl_
                 log_info("Testing doubles.\n");
             }
         }
+        if (vecType[typeIndex] == kHalf)
+        {
+            if (!is_extension_available(device, "cl_khr_fp16"))
+            {
+                log_info("Extension cl_khr_fp16 not supported; skipping half "
+                         "tests.\n");
+                continue;
+            }
+            else
+            {
+                log_info("Testing halfs.\n");
+            }
+        }
         for( testTypeIndex = 0; testVecType[ testTypeIndex ] != kNumExplicitTypes; testTypeIndex++ )
         {
             if( testVecType[ testTypeIndex ] != vecType[ typeIndex ] )
@@ -714,85 +792,3 @@ int test_relational_select_unsigned(cl_device_id device, cl_context context, cl_
 
     return retVal;
 }
-
-
-
-extern int test_relational_isequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isnotequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isgreaterequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isless_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_islessequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_islessgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isnotequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isgreaterequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isless_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_islessequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_islessgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-
-
-int test_relational_isequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_isequal_float( device, context, queue, numElements );
-    err |= test_relational_isequal_double( device, context, queue, numElements );
-    return err;
-}
-
-
-int test_relational_isnotequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_isnotequal_float( device, context, queue, numElements );
-    err |= test_relational_isnotequal_double( device, context, queue, numElements );
-    return err;
-}
-
-
-int test_relational_isgreater(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_isgreater_float( device, context, queue, numElements );
-    err |= test_relational_isgreater_double( device, context, queue, numElements );
-    return err;
-}
-
-
-int test_relational_isgreaterequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_isgreaterequal_float( device, context, queue, numElements );
-    err |= test_relational_isgreaterequal_double( device, context, queue, numElements );
-    return err;
-}
-
-
-int test_relational_isless(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_isless_float( device, context, queue, numElements );
-    err |= test_relational_isless_double( device, context, queue, numElements );
-    return err;
-}
-
-
-int test_relational_islessequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_islessequal_float( device, context, queue, numElements );
-    err |= test_relational_islessequal_double( device, context, queue, numElements );
-    return err;
-}
-
-
-int test_relational_islessgreater(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_islessgreater_float( device, context, queue, numElements );
-    err |= test_relational_islessgreater_double( device, context, queue, numElements );
-    return err;
-}
-
-

From 0447b7a2c80f40ff716300dd4bca172722749e81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?=
 <jmartinezcaamao@gmail.com>
Date: Tue, 16 May 2023 17:44:15 +0200
Subject: [PATCH 07/30] Avoid integer overflows when computing abs_diff (#1689)

After compiling OpenCL CTS with -fsanitize=undefined I stumbled upon
some signed integer overflows.

test_absdiff.cpp:103:24: runtime error: signed integer overflow:
  419625160 - -1937831252 cannot be represented in type 'int'
test_absdiff.cpp:101:28: runtime error: signed integer overflow:
  1277901399 - -1294103363 cannot be represented in type 'int'
test_absdiff.cpp:140:24: runtime error: signed integer overflow:
  8945130301981949496 - -2872789959208163723 cannot be represented in type
  'long int'
test_absdiff.cpp:138:29: runtime error: signed integer overflow:
  5488544718097069860 - -5558131619970145206 cannot be represented in type
  'long int'

The result from these operations was used to compare against the kernel result.

This patch replaces the abs_diff manual computations by a function that handles
the signed integer overflow case.
---
 test_conformance/integer_ops/test_absdiff.cpp | 43 ++++++++-----------
 1 file changed, 19 insertions(+), 24 deletions(-)

diff --git a/test_conformance/integer_ops/test_absdiff.cpp b/test_conformance/integer_ops/test_absdiff.cpp
index 7459bd2d..710b9c4e 100644
--- a/test_conformance/integer_ops/test_absdiff.cpp
+++ b/test_conformance/integer_ops/test_absdiff.cpp
@@ -22,6 +22,17 @@
 
 #include "procs.h"
 
+template <class Integer>
+static typename std::make_unsigned<Integer>::type abs_diff(Integer a, Integer b)
+{
+    using Unsigned = typename std::make_unsigned<Integer>::type;
+    Unsigned ua = a;
+    Unsigned ub = b;
+    Unsigned diff = ua - ub;
+    if (a < b) diff = -diff;
+    return diff;
+}
+
 static int verify_absdiff_char( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
 {
     const cl_char *inA = (const cl_char *)p;
@@ -30,9 +41,7 @@ static int verify_absdiff_char( const void *p, const void *q, const void *r, siz
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_uchar r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_uchar r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
@@ -47,9 +56,7 @@ static int verify_absdiff_uchar( const void *p, const void *q, const void *r, si
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_uchar r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_uchar r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
@@ -64,9 +71,7 @@ static int verify_absdiff_short( const void *p, const void *q, const void *r, si
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_ushort r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_ushort r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
@@ -81,9 +86,7 @@ static int verify_absdiff_ushort( const void *p, const void *q, const void *r, s
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_ushort r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_ushort r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
@@ -98,9 +101,7 @@ static int verify_absdiff_int( const void *p, const void *q, const void *r, size
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_uint r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_uint r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         {
             log_info( "%ld) Failure for absdiff( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] );
@@ -118,9 +119,7 @@ static int verify_absdiff_uint( const void *p, const void *q, const void *r, siz
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_uint r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_uint r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
@@ -135,9 +134,7 @@ static int verify_absdiff_long( const void *p, const void *q, const void *r, siz
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_ulong r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_ulong r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
@@ -152,9 +149,7 @@ static int verify_absdiff_ulong( const void *p, const void *q, const void *r, si
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_ulong r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_ulong r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }

From 32688a47b30bf2168f613cb2ce7ee0a40c52fe0b Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 16 May 2023 17:44:42 +0200
Subject: [PATCH 08/30] Complementation and modernization of commonfns tests 
 (#1694)

* Unified common functions tests due to preparation for adding cl_khr_fp16 support

* Renamed base structure, few cosmetic corrections

* Added corrections due to code review

* Removed comment separators

* Added review related corrections
---
 test_conformance/commonfns/CMakeLists.txt     |  14 +-
 test_conformance/commonfns/main.cpp           |  36 +-
 test_conformance/commonfns/procs.h            |   9 +-
 test_conformance/commonfns/test_base.h        | 193 +++++
 test_conformance/commonfns/test_binary_fn.cpp | 398 +++++-----
 test_conformance/commonfns/test_clamp.cpp     | 409 +++++------
 test_conformance/commonfns/test_degrees.cpp   | 470 ------------
 test_conformance/commonfns/test_fmax.cpp      | 233 ------
 test_conformance/commonfns/test_fmaxf.cpp     | 244 -------
 test_conformance/commonfns/test_fmin.cpp      | 238 ------
 test_conformance/commonfns/test_fminf.cpp     | 236 ------
 test_conformance/commonfns/test_max.cpp       |  60 --
 test_conformance/commonfns/test_maxf.cpp      |  64 --
 test_conformance/commonfns/test_min.cpp       |  56 --
 test_conformance/commonfns/test_minf.cpp      |  70 --
 test_conformance/commonfns/test_mix.cpp       | 390 ++++++----
 test_conformance/commonfns/test_radians.cpp   | 468 ------------
 test_conformance/commonfns/test_sign.cpp      | 437 -----------
 .../commonfns/test_smoothstep.cpp             | 501 ++++++-------
 .../commonfns/test_smoothstepf.cpp            | 259 -------
 test_conformance/commonfns/test_step.cpp      | 682 ++++++------------
 test_conformance/commonfns/test_stepf.cpp     | 546 --------------
 test_conformance/commonfns/test_unary_fn.cpp  | 365 ++++++++++
 23 files changed, 1693 insertions(+), 4685 deletions(-)
 create mode 100644 test_conformance/commonfns/test_base.h
 delete mode 100644 test_conformance/commonfns/test_degrees.cpp
 delete mode 100644 test_conformance/commonfns/test_fmax.cpp
 delete mode 100644 test_conformance/commonfns/test_fmaxf.cpp
 delete mode 100644 test_conformance/commonfns/test_fmin.cpp
 delete mode 100644 test_conformance/commonfns/test_fminf.cpp
 delete mode 100644 test_conformance/commonfns/test_max.cpp
 delete mode 100644 test_conformance/commonfns/test_maxf.cpp
 delete mode 100644 test_conformance/commonfns/test_min.cpp
 delete mode 100644 test_conformance/commonfns/test_minf.cpp
 delete mode 100644 test_conformance/commonfns/test_radians.cpp
 delete mode 100644 test_conformance/commonfns/test_sign.cpp
 delete mode 100644 test_conformance/commonfns/test_smoothstepf.cpp
 delete mode 100644 test_conformance/commonfns/test_stepf.cpp
 create mode 100644 test_conformance/commonfns/test_unary_fn.cpp

diff --git a/test_conformance/commonfns/CMakeLists.txt b/test_conformance/commonfns/CMakeLists.txt
index 5aa29250..bea20cf5 100644
--- a/test_conformance/commonfns/CMakeLists.txt
+++ b/test_conformance/commonfns/CMakeLists.txt
@@ -3,22 +3,10 @@ set(MODULE_NAME COMMONFNS)
 set(${MODULE_NAME}_SOURCES
     main.cpp
     test_clamp.cpp
-    test_degrees.cpp
-    test_max.cpp
-    test_maxf.cpp
-    test_min.cpp
-    test_minf.cpp
+    test_unary_fn.cpp
     test_mix.cpp
-    test_radians.cpp
     test_step.cpp
-    test_stepf.cpp
     test_smoothstep.cpp
-    test_smoothstepf.cpp
-    test_sign.cpp
-    test_fmax.cpp
-    test_fmin.cpp
-    test_fmaxf.cpp
-    test_fminf.cpp
     test_binary_fn.cpp
 )
 
diff --git a/test_conformance/commonfns/main.cpp b/test_conformance/commonfns/main.cpp
index b8364d5a..3e4b0b8e 100644
--- a/test_conformance/commonfns/main.cpp
+++ b/test_conformance/commonfns/main.cpp
@@ -13,11 +13,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
 
 #include <stdio.h>
 #include <string.h>
 #include "procs.h"
+#include "test_base.h"
+
+std::map<size_t, std::string> BaseFunctionTest::type2name;
 
 int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount];
 int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3};
@@ -32,25 +34,13 @@ static void initVecSizes() {
     }
 }
 
-
 test_definition test_list[] = {
-    ADD_TEST( clamp ),
-    ADD_TEST( degrees ),
-    ADD_TEST( fmax ),
-    ADD_TEST( fmaxf ),
-    ADD_TEST( fmin ),
-    ADD_TEST( fminf ),
-    ADD_TEST( max ),
-    ADD_TEST( maxf ),
-    ADD_TEST( min ),
-    ADD_TEST( minf ),
-    ADD_TEST( mix ),
-    ADD_TEST( radians ),
-    ADD_TEST( step ),
-    ADD_TEST( stepf ),
-    ADD_TEST( smoothstep ),
-    ADD_TEST( smoothstepf ),
-    ADD_TEST( sign ),
+    ADD_TEST(clamp),      ADD_TEST(degrees),     ADD_TEST(fmax),
+    ADD_TEST(fmaxf),      ADD_TEST(fmin),        ADD_TEST(fminf),
+    ADD_TEST(max),        ADD_TEST(maxf),        ADD_TEST(min),
+    ADD_TEST(minf),       ADD_TEST(mix),         ADD_TEST(mixf),
+    ADD_TEST(radians),    ADD_TEST(step),        ADD_TEST(stepf),
+    ADD_TEST(smoothstep), ADD_TEST(smoothstepf), ADD_TEST(sign),
 };
 
 const int test_num = ARRAY_SIZE( test_list );
@@ -58,6 +48,14 @@ const int test_num = ARRAY_SIZE( test_list );
 int main(int argc, const char *argv[])
 {
     initVecSizes();
+
+    if (BaseFunctionTest::type2name.empty())
+    {
+        BaseFunctionTest::type2name[sizeof(half)] = "half";
+        BaseFunctionTest::type2name[sizeof(float)] = "float";
+        BaseFunctionTest::type2name[sizeof(double)] = "double";
+    }
+
     return runTestHarness(argc, argv, test_num, test_list, false, 0);
 }
 
diff --git a/test_conformance/commonfns/procs.h b/test_conformance/commonfns/procs.h
index dada94f9..c1115ee7 100644
--- a/test_conformance/commonfns/procs.h
+++ b/test_conformance/commonfns/procs.h
@@ -37,6 +37,8 @@ extern int        test_maxf(cl_device_id device, cl_context context, cl_command_
 extern int        test_min(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_mixf(cl_device_id device, cl_context context,
+                     cl_command_queue queue, int num_elements);
 extern int        test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_step(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
@@ -44,11 +46,4 @@ extern int        test_smoothstep(cl_device_id device, cl_context context, cl_co
 extern int        test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 
-typedef int     (*binary_verify_float_fn)( float *x, float *y, float *out, int numElements, int vecSize );
-typedef int     (*binary_verify_double_fn)( double *x, double *y, double *out, int numElements, int vecSize );
-
-extern int      test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems,
-                           const char *fnName, bool vectorSecondParam,
-                           binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn );
-
 
diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h
new file mode 100644
index 00000000..44291042
--- /dev/null
+++ b/test_conformance/commonfns/test_base.h
@@ -0,0 +1,193 @@
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef TEST_COMMONFNS_BASE_H
+#define TEST_COMMONFNS_BASE_H
+
+#include <vector>
+#include <map>
+#include <memory>
+
+#include <CL/cl_half.h>
+#include <CL/cl_ext.h>
+
+#include "harness/deviceInfo.h"
+#include "harness/testHarness.h"
+#include "harness/typeWrappers.h"
+
+
+template <typename T>
+using VerifyFuncBinary = int (*)(const T *const, const T *const, const T *const,
+                                 const int num, const int vs, const int vp);
+
+
+template <typename T>
+using VerifyFuncUnary = int (*)(const T *const, const T *const, const int num);
+
+
+using half = cl_half;
+
+
+struct BaseFunctionTest
+{
+    BaseFunctionTest(cl_device_id device, cl_context context,
+                     cl_command_queue queue, int num_elems, const char *fn,
+                     bool vsp)
+        : device(device), context(context), queue(queue), num_elems(num_elems),
+          fnName(fn), vecParam(vsp)
+    {}
+
+    // Test body returning an OpenCL error code
+    virtual cl_int Run() = 0;
+
+    cl_device_id device;
+    cl_context context;
+    cl_command_queue queue;
+
+    int num_elems;
+    std::string fnName;
+    bool vecParam;
+
+    static std::map<size_t, std::string> type2name;
+};
+
+
+struct MinTest : BaseFunctionTest
+{
+    MinTest(cl_device_id device, cl_context context, cl_command_queue queue,
+            int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct MaxTest : BaseFunctionTest
+{
+    MaxTest(cl_device_id device, cl_context context, cl_command_queue queue,
+            int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct ClampTest : BaseFunctionTest
+{
+    ClampTest(cl_device_id device, cl_context context, cl_command_queue queue,
+              int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct DegreesTest : BaseFunctionTest
+{
+    DegreesTest(cl_device_id device, cl_context context, cl_command_queue queue,
+                int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct RadiansTest : BaseFunctionTest
+{
+    RadiansTest(cl_device_id device, cl_context context, cl_command_queue queue,
+                int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct SignTest : BaseFunctionTest
+{
+    SignTest(cl_device_id device, cl_context context, cl_command_queue queue,
+             int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct SmoothstepTest : BaseFunctionTest
+{
+    SmoothstepTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int num_elems, const char *fn,
+                   bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct StepTest : BaseFunctionTest
+{
+    StepTest(cl_device_id device, cl_context context, cl_command_queue queue,
+             int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct MixTest : BaseFunctionTest
+{
+    MixTest(cl_device_id device, cl_context context, cl_command_queue queue,
+            int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+template <typename... Args>
+std::string string_format(const std::string &format, Args... args)
+{
+    int sformat = std::snprintf(nullptr, 0, format.c_str(), args...) + 1;
+    if (sformat <= 0)
+        throw std::runtime_error("string_format: string processing error.");
+    auto format_size = static_cast<size_t>(sformat);
+    std::unique_ptr<char[]> buffer(new char[format_size]);
+    std::snprintf(buffer.get(), format_size, format.c_str(), args...);
+    return std::string(buffer.get(), buffer.get() + format_size - 1);
+}
+
+
+template <class T>
+int MakeAndRunTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int num_elements,
+                   const char *fn = "", bool vsp = false)
+{
+    auto test_fixture = T(device, context, queue, num_elements, fn, vsp);
+
+    cl_int error = test_fixture.Run();
+    test_error_ret(error, "Test Failed", TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+#endif // TEST_COMMONFNS_BASE_H
diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp
index b40bf1f6..1eb12f73 100644
--- a/test_conformance/commonfns/test_binary_fn.cpp
+++ b/test_conformance/commonfns/test_binary_fn.cpp
@@ -13,14 +13,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
 
 #include <stdio.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <vector>
+
+#include "harness/deviceInfo.h"
+#include "harness/typeWrappers.h"
 
 #include "procs.h"
+#include "test_base.h"
 
 const char *binary_fn_code_pattern =
 "%s\n" /* optional pragma */
@@ -49,216 +53,286 @@ const char *binary_fn_code_pattern_v3_scalar =
 "    vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n"
 "}\n";
 
-int test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems,
-                    const char *fnName, bool vectorSecondParam,
-                    binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn )
+
+template <typename T>
+int test_binary_fn(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int n_elems,
+                   const std::string& fnName, bool vecSecParam,
+                   VerifyFuncBinary<T> verifyFn)
 {
-    cl_mem      streams[6];
-    cl_float      *input_ptr[2], *output_ptr;
-    cl_double     *input_ptr_double[2], *output_ptr_double=NULL;
-    cl_program  *program;
-    cl_kernel   *kernel;
-    size_t threads[1];
-    int num_elements;
-    int err;
-    int i, j;
-    MTdata d;
+    clMemWrapper streams[3];
+    std::vector<T> input_ptr[2], output_ptr;
 
-      program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2);
-      kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2);
+    std::vector<clProgramWrapper> programs;
+    std::vector<clKernelWrapper> kernels;
+    int err, i, j;
+    MTdataHolder d = MTdataHolder(gRandomSeed);
 
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
+    assert(BaseFunctionTest::type2name.find(sizeof(T))
+           != BaseFunctionTest::type2name.end());
+    auto tname = BaseFunctionTest::type2name[sizeof(T)];
 
-    int test_double = 0;
-    if(is_extension_available( device, "cl_khr_fp64" ))
-    {
-        log_info("Testing doubles.\n");
-        test_double = 1;
-    }
+    programs.resize(kTotalVecCount);
+    kernels.resize(kTotalVecCount);
 
-    for( i = 0; i < 2; i++ )
-    {
-        input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-        if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    }
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    int num_elements = n_elems * (1 << (kTotalVecCount - 1));
+
+    for (i = 0; i < 2; i++) input_ptr[i].resize(num_elements);
+    output_ptr.resize(num_elements);
 
     for( i = 0; i < 3; i++ )
     {
-        streams[i] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, &err);
+        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                    sizeof(T) * num_elements, NULL, &err);
         test_error( err, "clCreateBuffer failed");
     }
 
-    if (test_double)
-        for( i = 3; i < 6; i++ )
-        {
-            streams[i] =
-                clCreateBuffer(context, CL_MEM_READ_WRITE,
-                               sizeof(cl_double) * num_elements, NULL, &err);
-            test_error(err, "clCreateBuffer failed");
-        }
-
-    d = init_genrand( gRandomSeed );
-    for( j = 0; j < num_elements; j++ )
+    std::string pragma_str;
+    if (std::is_same<T, float>::value)
     {
-        input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
-        input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
-        if (test_double)
+        for (j = 0; j < num_elements; j++)
         {
-            input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d);
-            input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d);
+            input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
+            input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
         }
     }
-    free_mtdata(d);     d = NULL;
-
-    for( i = 0; i < 2; i++ )
+    else if (std::is_same<T, double>::value)
     {
-        err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL );
-        test_error( err, "Unable to write input buffer" );
-
-        if (test_double)
+        pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+        for (j = 0; j < num_elements; j++)
         {
-          err = clEnqueueWriteBuffer( queue, streams[ 3 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL );
-          test_error( err, "Unable to write input buffer" );
+            input_ptr[0][j] = get_random_double(-0x20000000, 0x20000000, d);
+            input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d);
         }
     }
 
-    for( i = 0; i < kTotalVecCount; i++ )
+    for (i = 0; i < 2; i++)
     {
-        char programSrc[ 10240 ];
-        char vecSizeNames[][ 3 ] = { "", "2", "4", "8", "16", "3" };
+        err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
+                                   sizeof(T) * num_elements,
+                                   &input_ptr[i].front(), 0, NULL, NULL);
+        test_error(err, "Unable to write input buffer");
+    }
 
-        if(i >= kVectorSizeCount) {
-            // do vec3 print
+    char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
 
-            if(vectorSecondParam) {
-            sprintf( programSrc,binary_fn_code_pattern_v3, "", "float", "float", "float", fnName );
-        } else  {
-            sprintf( programSrc,binary_fn_code_pattern_v3_scalar, "", "float", "float", "float", fnName );
+    for (i = 0; i < kTotalVecCount; i++)
+    {
+        std::string kernelSource;
+        if (i >= kVectorSizeCount)
+        {
+            if (vecSecParam)
+            {
+                std::string str = binary_fn_code_pattern_v3;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str(), fnName.c_str());
+            }
+            else
+            {
+                std::string str = binary_fn_code_pattern_v3_scalar;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str(), fnName.c_str());
             }
-        } else  {
-            // do regular
-            sprintf( programSrc, binary_fn_code_pattern, "", "float", vecSizeNames[ i ], "float", vectorSecondParam ? vecSizeNames[ i ] : "", "float", vecSizeNames[ i ], fnName );
         }
-        const char *ptr = programSrc;
-        err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &ptr, "test_fn" );
-        test_error( err, "Unable to create kernel" );
-
-        if (test_double)
+        else
         {
-        if(i >= kVectorSizeCount) {
-        if(vectorSecondParam) {
-            sprintf( programSrc, binary_fn_code_pattern_v3, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
-            "double",  "double",  "double",  fnName );
-        } else {
+            // do regular
+            std::string str = binary_fn_code_pattern;
+            kernelSource = string_format(
+                str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i],
+                tname.c_str(), vecSecParam ? vecSizeNames[i] : "",
+                tname.c_str(), vecSizeNames[i], fnName.c_str());
+        }
+        const char* programPtr = kernelSource.c_str();
+        err = create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
+                                          (const char**)&programPtr, "test_fn");
+        test_error(err, "Unable to create kernel");
 
-        sprintf( programSrc, binary_fn_code_pattern_v3_scalar, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
-                 "double",  "double",  "double",  fnName );
-        }
-        } else {
-        sprintf( programSrc, binary_fn_code_pattern, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
-            "double", vecSizeNames[ i ], "double", vectorSecondParam ? vecSizeNames[ i ] : "", "double", vecSizeNames[ i ], fnName );
-        }
-            ptr = programSrc;
-            err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &ptr, "test_fn" );
-            test_error( err, "Unable to create kernel" );
-        }
-    }
-
-    for( i = 0; i < kTotalVecCount; i++ )
-    {
         for( j = 0; j < 3; j++ )
         {
-            err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] );
+            err =
+                clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
             test_error( err, "Unable to set kernel argument" );
         }
 
-        threads[0] = (size_t)n_elems;
+        size_t threads = (size_t)n_elems;
 
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
+                                     0, NULL, NULL);
         test_error( err, "Unable to execute kernel" );
 
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        err = clEnqueueReadBuffer(queue, streams[2], true, 0,
+                                  sizeof(T) * num_elements, &output_ptr[0], 0,
+                                  NULL, NULL);
         test_error( err, "Unable to read results" );
 
-
-
-        if( floatVerifyFn( input_ptr[0], input_ptr[1], output_ptr, n_elems, ((g_arrVecSizes[i])) ) )
+        if (verifyFn((T*)&input_ptr[0].front(), (T*)&input_ptr[1].front(),
+                     &output_ptr[0], n_elems, g_arrVecSizes[i],
+                     vecSecParam ? 1 : 0))
         {
-            log_error(" float%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float");
+            log_error("%s %s%d%s test failed\n", fnName.c_str(), tname.c_str(),
+                      ((g_arrVecSizes[i])),
+                      vecSecParam ? "" : std::string(", " + tname).c_str());
             err = -1;
         }
         else
         {
-            log_info(" float%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float");
+            log_info("%s %s%d%s test passed\n", fnName.c_str(), tname.c_str(),
+                     ((g_arrVecSizes[i])),
+                     vecSecParam ? "" : std::string(", " + tname).c_str());
             err = 0;
         }
 
         if (err)
             break;
     }
-
-    if (test_double)
-    {
-        for( i = 0; i < kTotalVecCount; i++ )
-        {
-            for( j = 0; j < 3; j++ )
-            {
-                err = clSetKernelArg( kernel[ kTotalVecCount + i ], j, sizeof( streams[ 3 + j ] ), &streams[ 3 + j ] );
-                test_error( err, "Unable to set kernel argument" );
-            }
-
-            threads[0] = (size_t)n_elems;
-
-            err = clEnqueueNDRangeKernel( queue, kernel[kTotalVecCount + i], 1, NULL, threads, NULL, 0, NULL, NULL );
-            test_error( err, "Unable to execute kernel" );
-
-            err = clEnqueueReadBuffer( queue, streams[5], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL );
-            test_error( err, "Unable to read results" );
-
-            if( doubleVerifyFn( input_ptr_double[0], input_ptr_double[1], output_ptr_double, n_elems, ((g_arrVecSizes[i]))))
-            {
-                log_error(" double%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double");
-                err = -1;
-            }
-            else
-            {
-                log_info(" double%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double");
-                err = 0;
-            }
-
-            if (err)
-            break;
-        }
-    }
-
-
-    for( i = 0; i < ((test_double) ? 6 : 3); i++ )
-    {
-        clReleaseMemObject(streams[i]);
-    }
-    for (i=0; i < ((test_double) ? kTotalVecCount * 2 : kTotalVecCount) ; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-      free(program);
-      free(kernel);
-
-    if (test_double)
-    {
-        free(input_ptr_double[0]);
-        free(input_ptr_double[1]);
-        free(output_ptr_double);
-    }
-
     return err;
 }
 
+namespace {
 
+template <typename T>
+int max_verify(const T* const x, const T* const y, const T* const out,
+               int numElements, int vecSize, int vecParam)
+{
+    for (int i = 0; i < numElements; i++)
+    {
+        for (int j = 0; j < vecSize; j++)
+        {
+            int k = i * vecSize + j;
+            int l = (k * vecParam + i * (1 - vecParam));
+            T v = (x[k] < y[l]) ? y[l] : x[k];
+            if (v != out[k])
+            {
+                log_error(
+                    "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is "
+                    "vector %d, element %d, for vector size %d)\n",
+                    k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize);
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+template <typename T>
+int min_verify(const T* const x, const T* const y, const T* const out,
+               int numElements, int vecSize, int vecParam)
+{
+    for (int i = 0; i < numElements; i++)
+    {
+        for (int j = 0; j < vecSize; j++)
+        {
+            int k = i * vecSize + j;
+            int l = (k * vecParam + i * (1 - vecParam));
+            T v = (x[k] > y[l]) ? y[l] : x[k];
+            if (v != out[k])
+            {
+                log_error(
+                    "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is "
+                    "vector %d, element %d, for vector size %d)\n",
+                    k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize);
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+}
+
+cl_int MaxTest::Run()
+{
+    cl_int error = CL_SUCCESS;
+
+    error = test_binary_fn<float>(device, context, queue, num_elems,
+                                  fnName.c_str(), vecParam, max_verify<float>);
+    test_error(error, "MaxTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+    {
+        error = test_binary_fn<double>(device, context, queue, num_elems,
+                                       fnName.c_str(), vecParam,
+                                       max_verify<double>);
+        test_error(error, "MaxTest::Run<double> failed");
+    }
+
+    return error;
+}
+
+cl_int MinTest::Run()
+{
+    cl_int error = CL_SUCCESS;
+
+    error = test_binary_fn<float>(device, context, queue, num_elems,
+                                  fnName.c_str(), vecParam, min_verify<float>);
+    test_error(error, "MinTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+    {
+        error = test_binary_fn<double>(device, context, queue, num_elems,
+                                       fnName.c_str(), vecParam,
+                                       min_verify<double>);
+        test_error(error, "MinTest::Run<double> failed");
+    }
+
+    return error;
+}
+
+int test_min(cl_device_id device, cl_context context, cl_command_queue queue,
+             int n_elems)
+{
+    return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "min",
+                                   true);
+}
+
+int test_minf(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "min",
+                                   false);
+}
+
+int test_fmin(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "fmin",
+                                   true);
+}
+
+int test_fminf(cl_device_id device, cl_context context, cl_command_queue queue,
+               int n_elems)
+{
+    return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "fmin",
+                                   false);
+}
+
+int test_max(cl_device_id device, cl_context context, cl_command_queue queue,
+             int n_elems)
+{
+    return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "max",
+                                   true);
+}
+
+int test_maxf(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "max",
+                                   false);
+}
+
+int test_fmax(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "fmax",
+                                   true);
+}
+
+int test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue,
+               int n_elems)
+{
+    return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "fmax",
+                                   false);
+}
diff --git a/test_conformance/commonfns/test_clamp.cpp b/test_conformance/commonfns/test_clamp.cpp
index bbb83645..0e96fb60 100644
--- a/test_conformance/commonfns/test_clamp.cpp
+++ b/test_conformance/commonfns/test_clamp.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,303 +13,252 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
 
 #include <stdio.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <vector>
+
+#include "harness/deviceInfo.h"
+#include "harness/typeWrappers.h"
 
 #include "procs.h"
+#include "test_base.h"
+
 
 #ifndef M_PI
-#define M_PI    3.14159265358979323846264338327950288
+#define M_PI 3.14159265358979323846264338327950288
 #endif
 
-#define CLAMP_KERNEL( type )                        \
-    const char *clamp_##type##_kernel_code =                \
-    EMIT_PRAGMA_DIRECTIVE                        \
-    "__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \
-    "{\n"                                \
-    "    int  tid = get_global_id(0);\n"                \
-    "\n"                                \
-    "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"    \
-    "}\n";
 
-#define CLAMP_KERNEL_V( type, size)                    \
-    const char *clamp_##type##size##_kernel_code =            \
-    EMIT_PRAGMA_DIRECTIVE                        \
-    "__kernel void test_clamp(__global " #type #size " *x, __global " #type #size " *minval, __global " #type #size " *maxval, __global " #type #size " *dst)\n" \
-    "{\n"                                \
-    "    int  tid = get_global_id(0);\n"                \
-    "\n"                                \
-    "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"    \
-    "}\n";
+#define CLAMP_KERNEL(type)                                                     \
+    const char *clamp_##type##_kernel_code = EMIT_PRAGMA_DIRECTIVE             \
+        "__kernel void test_clamp(__global " #type " *x, __global " #type      \
+        " *minval, __global " #type " *maxval, __global " #type " *dst)\n"     \
+        "{\n"                                                                  \
+        "    int  tid = get_global_id(0);\n"                                   \
+        "\n"                                                                   \
+        "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"            \
+        "}\n";
+
+#define CLAMP_KERNEL_V(type, size)                                             \
+    const char *clamp_##type##size##_kernel_code = EMIT_PRAGMA_DIRECTIVE       \
+        "__kernel void test_clamp(__global " #type #size                       \
+        " *x, __global " #type #size " *minval, __global " #type #size         \
+        " *maxval, __global " #type #size " *dst)\n"                           \
+        "{\n"                                                                  \
+        "    int  tid = get_global_id(0);\n"                                   \
+        "\n"                                                                   \
+        "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"            \
+        "}\n";
+
+#define CLAMP_KERNEL_V3(type, size)                                            \
+    const char *clamp_##type##size##_kernel_code = EMIT_PRAGMA_DIRECTIVE       \
+        "__kernel void test_clamp(__global " #type " *x, __global " #type      \
+        " *minval, __global " #type " *maxval, __global " #type " *dst)\n"     \
+        "{\n"                                                                  \
+        "    int  tid = get_global_id(0);\n"                                   \
+        "\n"                                                                   \
+        "    vstore3(clamp(vload3(tid, x), vload3(tid,minval), "               \
+        "vload3(tid,maxval)), tid, dst);\n"                                    \
+        "}\n";
 
-#define CLAMP_KERNEL_V3( type, size)                    \
-    const char *clamp_##type##size##_kernel_code =            \
-    EMIT_PRAGMA_DIRECTIVE                        \
-    "__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \
-    "{\n"                                \
-    "    int  tid = get_global_id(0);\n"                \
-    "\n"                                \
-    "    vstore3(clamp(vload3(tid, x), vload3(tid,minval), vload3(tid,maxval)), tid, dst);\n"    \
-    "}\n";
 
 #define EMIT_PRAGMA_DIRECTIVE " "
-CLAMP_KERNEL( float )
-CLAMP_KERNEL_V( float, 2 )
-CLAMP_KERNEL_V( float, 4 )
-CLAMP_KERNEL_V( float, 8 )
-CLAMP_KERNEL_V( float, 16 )
-CLAMP_KERNEL_V3( float, 3)
+CLAMP_KERNEL(float)
+CLAMP_KERNEL_V(float, 2)
+CLAMP_KERNEL_V(float, 4)
+CLAMP_KERNEL_V(float, 8)
+CLAMP_KERNEL_V(float, 16)
+CLAMP_KERNEL_V3(float, 3)
 #undef EMIT_PRAGMA_DIRECTIVE
 
 #define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-CLAMP_KERNEL( double )
-CLAMP_KERNEL_V( double, 2 )
-CLAMP_KERNEL_V( double, 4 )
-CLAMP_KERNEL_V( double, 8 )
-CLAMP_KERNEL_V( double, 16 )
-CLAMP_KERNEL_V3( double, 3 )
+CLAMP_KERNEL(double)
+CLAMP_KERNEL_V(double, 2)
+CLAMP_KERNEL_V(double, 4)
+CLAMP_KERNEL_V(double, 8)
+CLAMP_KERNEL_V(double, 16)
+CLAMP_KERNEL_V3(double, 3)
 #undef EMIT_PRAGMA_DIRECTIVE
 
-const char *clamp_float_codes[] = { clamp_float_kernel_code, clamp_float2_kernel_code, clamp_float4_kernel_code, clamp_float8_kernel_code, clamp_float16_kernel_code, clamp_float3_kernel_code };
-const char *clamp_double_codes[] = { clamp_double_kernel_code, clamp_double2_kernel_code, clamp_double4_kernel_code, clamp_double8_kernel_code, clamp_double16_kernel_code, clamp_double3_kernel_code };
+const char *clamp_float_codes[] = {
+    clamp_float_kernel_code,   clamp_float2_kernel_code,
+    clamp_float4_kernel_code,  clamp_float8_kernel_code,
+    clamp_float16_kernel_code, clamp_float3_kernel_code
+};
+const char *clamp_double_codes[] = {
+    clamp_double_kernel_code,   clamp_double2_kernel_code,
+    clamp_double4_kernel_code,  clamp_double8_kernel_code,
+    clamp_double16_kernel_code, clamp_double3_kernel_code
+};
 
-static int verify_clamp(float *x, float *minval, float *maxval, float *outptr, int n)
+namespace {
+
+
+template <typename T>
+int verify_clamp(const T *const x, const T *const minval, const T *const maxval,
+                 const T *const outptr, int n)
 {
-    float       t;
-    int         i;
-
-    for (i=0; i<n; i++)
+    T t;
+    for (int i = 0; i < n; i++)
     {
-        t = fminf( fmaxf( x[ i ], minval[ i ] ), maxval[ i ] );
+        t = std::min(std::max(x[i], minval[i]), maxval[i]);
         if (t != outptr[i])
         {
-            log_error( "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, x[i], minval[i], maxval[i], t, outptr[i] );
+            log_error(
+                "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i,
+                x[i], minval[i], maxval[i], t, outptr[i]);
             return -1;
         }
     }
 
     return 0;
 }
-
-static int verify_clamp_double(double *x, double *minval, double *maxval, double *outptr, int n)
-{
-    double       t;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        t = fmin( fmax( x[ i ], minval[ i ] ), maxval[ i ] );
-        if (t != outptr[i])
-        {
-            log_error( "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, x[i], minval[i], maxval[i], t, outptr[i] );
-            return -1;
-        }
-    }
-
-    return 0;
 }
 
-int
-test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+
+template <typename T>
+int test_clamp_fn(cl_device_id device, cl_context context,
+                  cl_command_queue queue, int n_elems)
 {
-    cl_mem      streams[8];
-    cl_float      *input_ptr[3], *output_ptr;
-    cl_double     *input_ptr_double[3], *output_ptr_double = NULL;
-    cl_program  *program;
-    cl_kernel   *kernel;
-    size_t threads[1];
-    int num_elements;
-    int err;
-    int i, j;
-    MTdata d;
+    clMemWrapper streams[4];
+    std::vector<T> input_ptr[3], output_ptr;
 
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2);
+    std::vector<clProgramWrapper> programs;
+    std::vector<clKernelWrapper> kernels;
 
-    num_elements = n_elems * (1 << (kVectorSizeCount-1));
+    int err, i, j;
+    MTdataHolder d = MTdataHolder(gRandomSeed);
 
-    int test_double = 0;
-    if(is_extension_available( device, "cl_khr_fp64" )) {
-    log_info("Testing doubles.\n");
-      test_double = 1;
+    assert(BaseFunctionTest::type2name.find(sizeof(T))
+           != BaseFunctionTest::type2name.end());
+    auto tname = BaseFunctionTest::type2name[sizeof(T)];
+
+    programs.resize(kTotalVecCount);
+    kernels.resize(kTotalVecCount);
+
+    int num_elements = n_elems * (1 << (kVectorSizeCount - 1));
+
+    for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements);
+    output_ptr.resize(num_elements);
+
+    for (i = 0; i < 4; i++)
+    {
+        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                    sizeof(T) * num_elements, NULL, &err);
+        test_error(err, "clCreateBuffer failed");
     }
 
-
-    // why does this go from 0 to 2?? -- Oh, I see, there are four function
-    // arguments to the function, and 3 of them are inputs?
-    for( i = 0; i < 3; i++ )
+    if (std::is_same<T, float>::value)
     {
-        input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-        if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    }
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-
-    // why does this go from 0 to 3?
-    for( i = 0; i < 4; i++ )
-    {
-        streams[i] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, NULL);
-        if (!streams[0])
+        for (j = 0; j < num_elements; j++)
         {
-            log_error("clCreateBuffer failed\n");
-            return -1;
+            input_ptr[0][j] = get_random_float(-0x200000, 0x200000, d);
+            input_ptr[1][j] = get_random_float(-0x200000, 0x200000, d);
+            input_ptr[2][j] = get_random_float(input_ptr[1][j], 0x200000, d);
         }
     }
-    if (test_double)
-    for( i = 4; i < 8; i++ )
+    else if (std::is_same<T, double>::value)
+    {
+        for (j = 0; j < num_elements; j++)
         {
-            streams[i] =
-                clCreateBuffer(context, CL_MEM_READ_WRITE,
-                               sizeof(cl_double) * num_elements, NULL, NULL);
-            if (!streams[0])
-            {
-            log_error("clCreateBuffer failed\n");
-            return -1;
-            }
-        }
-
-    d = init_genrand( gRandomSeed );
-    for( j = 0; j < num_elements; j++ )
-    {
-        input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
-        input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
-        input_ptr[2][j] = get_random_float(input_ptr[1][j], 0x20000000, d);
-
-        if (test_double) {
-        input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d);
-        input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d);
-        input_ptr_double[2][j] = get_random_double(input_ptr_double[1][j], 0x20000000, d);
-        }
-    }
-    free_mtdata(d); d = NULL;
-
-    for( i = 0; i < 3; i++ )
-    {
-        err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL );
-        test_error( err, "Unable to write input buffer" );
-
-        if (test_double) {
-        err = clEnqueueWriteBuffer( queue, streams[ 4 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL );
-        test_error( err, "Unable to write input buffer" );
+            input_ptr[0][j] = get_random_double(-0x20000000, 0x20000000, d);
+            input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d);
+            input_ptr[2][j] = get_random_double(input_ptr[1][j], 0x20000000, d);
         }
     }
 
-    for( i = 0; i < kTotalVecCount; i++ )
+    for (i = 0; i < 3; i++)
     {
-        err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &clamp_float_codes[ i ], "test_clamp" );
-        test_error( err, "Unable to create kernel" );
+        err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
+                                   sizeof(T) * num_elements,
+                                   &input_ptr[i].front(), 0, NULL, NULL);
+        test_error(err, "Unable to write input buffer");
+    }
 
-        log_info("Just made a program for float, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], i);
+    for (i = 0; i < kTotalVecCount; i++)
+    {
+        if (std::is_same<T, float>::value)
+        {
+            err = create_single_kernel_helper(
+                context, &programs[i], &kernels[i], 1, &clamp_float_codes[i],
+                "test_clamp");
+            test_error(err, "Unable to create kernel");
+        }
+        else if (std::is_same<T, double>::value)
+        {
+            err = create_single_kernel_helper(
+                context, &programs[i], &kernels[i], 1, &clamp_double_codes[i],
+                "test_clamp");
+            test_error(err, "Unable to create kernel");
+        }
+
+        log_info("Just made a program for float, i=%d, size=%d, in slot %d\n",
+                 i, g_arrVecSizes[i], i);
         fflush(stdout);
 
-        if (test_double) {
-        err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &clamp_double_codes[ i ], "test_clamp" );
-        log_info("Just made a program for double, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], kTotalVecCount+i);
-        fflush(stdout);
-        test_error( err, "Unable to create kernel" );
-        }
-    }
-
-    for( i = 0; i < kTotalVecCount; i++ )
-    {
-        for( j = 0; j < 4; j++ )
+        for (j = 0; j < 4; j++)
         {
-            err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] );
-            test_error( err, "Unable to set kernel argument" );
+            err =
+                clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
+            test_error(err, "Unable to set kernel argument");
         }
 
-        threads[0] = (size_t)n_elems;
+        size_t threads = (size_t)n_elems;
 
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        test_error( err, "Unable to execute kernel" );
+        err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
+                                     0, NULL, NULL);
+        test_error(err, "Unable to execute kernel");
 
-        err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        test_error( err, "Unable to read results" );
+        err = clEnqueueReadBuffer(queue, streams[3], true, 0,
+                                  sizeof(T) * num_elements, &output_ptr[0], 0,
+                                  NULL, NULL);
+        test_error(err, "Unable to read results");
 
-        if (verify_clamp(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems*((g_arrVecSizes[i]))))
+        if (verify_clamp<T>((T *)&input_ptr[0].front(),
+                            (T *)&input_ptr[1].front(),
+                            (T *)&input_ptr[2].front(), (T *)&output_ptr[0],
+                            n_elems * ((g_arrVecSizes[i]))))
         {
-            log_error("CLAMP float%d test failed\n", ((g_arrVecSizes[i])));
+            log_error("CLAMP %s%d test failed\n", tname.c_str(),
+                      ((g_arrVecSizes[i])));
             err = -1;
         }
         else
         {
-            log_info("CLAMP float%d test passed\n", ((g_arrVecSizes[i])));
+            log_info("CLAMP %s%d test passed\n", tname.c_str(),
+                     ((g_arrVecSizes[i])));
             err = 0;
         }
 
-
-
-        if (err)
-        break;
-    }
-
-    // If the device supports double precision then test that
-    if (test_double)
-    {
-        for( ; i < 2*kTotalVecCount; i++ )
-        {
-
-            log_info("Start of test_double loop, i is %d\n", i);
-            for( j = 0; j < 4; j++ )
-            {
-                err = clSetKernelArg( kernel[i], j, sizeof( streams[j+4] ), &streams[j+4] );
-                test_error( err, "Unable to set kernel argument" );
-            }
-
-            threads[0] = (size_t)n_elems;
-
-            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-            test_error( err, "Unable to execute kernel" );
-
-            err = clEnqueueReadBuffer( queue, streams[7], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL );
-            test_error( err, "Unable to read results" );
-
-            if (verify_clamp_double(input_ptr_double[0], input_ptr_double[1], input_ptr_double[2], output_ptr_double, n_elems*g_arrVecSizes[(i-kTotalVecCount)]))
-            {
-                log_error("CLAMP double%d test failed\n", g_arrVecSizes[(i-kTotalVecCount)]);
-                err = -1;
-            }
-            else
-            {
-                log_info("CLAMP double%d test passed\n", g_arrVecSizes[(i-kTotalVecCount)]);
-                err = 0;
-            }
-
-            if (err)
-            break;
-        }
-    }
-
-
-    for( i = 0; i < ((test_double) ? 8 : 4); i++ )
-    {
-        clReleaseMemObject(streams[i]);
-    }
-    for (i=0; i < ((test_double) ? kTotalVecCount * 2-1 : kTotalVecCount); i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    free(program);
-    free(kernel);
-    if (test_double) {
-        free(input_ptr_double[0]);
-        free(input_ptr_double[1]);
-        free(input_ptr_double[2]);
-        free(output_ptr_double);
+        if (err) break;
     }
 
     return err;
 }
 
 
+cl_int ClampTest::Run()
+{
+    cl_int error = CL_SUCCESS;
+
+    error = test_clamp_fn<float>(device, context, queue, num_elems);
+    test_error(error, "ClampTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+    {
+        error = test_clamp_fn<double>(device, context, queue, num_elems);
+        test_error(error, "ClampTest::Run<double> failed");
+    }
+
+    return error;
+}
+
+
+int test_clamp(cl_device_id device, cl_context context, cl_command_queue queue,
+               int n_elems)
+{
+    return MakeAndRunTest<ClampTest>(device, context, queue, n_elems);
+}
diff --git a/test_conformance/commonfns/test_degrees.cpp b/test_conformance/commonfns/test_degrees.cpp
deleted file mode 100644
index 17311ba8..00000000
--- a/test_conformance/commonfns/test_degrees.cpp
+++ /dev/null
@@ -1,470 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-#ifndef M_PI
-#define M_PI    3.14159265358979323846264338327950288
-#endif
-
-static int test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
-
-
-const char *degrees_kernel_code =
-"__kernel void test_degrees(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees2_kernel_code =
-"__kernel void test_degrees2(__global float2 *src, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees4_kernel_code =
-"__kernel void test_degrees4(__global float4 *src, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees8_kernel_code =
-"__kernel void test_degrees8(__global float8 *src, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees16_kernel_code =
-"__kernel void test_degrees16(__global float16 *src, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees3_kernel_code =
-"__kernel void test_degrees3(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
-"}\n";
-
-
-#define MAX_ERR  2.0f
-
-static int
-verify_degrees(float *inptr, float *outptr, int n)
-{
-    float error, max_error = 0.0f;
-    double   r, max_val = NAN;
-    int     i, j, max_index = 0;
-
-    for (i=0,j=0; i<n; i++,j++)
-    {
-        r = (180.0 / M_PI) * inptr[i];
-        error = Ulp_Error( outptr[i], r );
-        if( fabsf(error) > max_error)
-        {
-            max_error = error;
-            max_index = i;
-            max_val = r;
-            if( fabsf(error) > MAX_ERR)
-            {
-                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
-                return 1;
-            }
-        }
-    }
-
-    log_info( "degrees: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
-
-    return 0;
-}
-
-int
-test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_float     *input_ptr[1], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    size_t threads[1];
-    int          num_elements;
-    int          err;
-    int          i;
-    MTdata        d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code, "test_degrees" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code, "test_degrees2" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code, "test_degrees4" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code, "test_degrees8" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code, "test_degrees16" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code, "test_degrees3" );
-    if (err)
-        return -1;
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-
-        // Line below is troublesome...
-        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        cl_uint dead = 0xdeaddead;
-        memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
-        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_degrees(input_ptr[0], output_ptr, n_elems*(i+1)))
-        {
-            log_error("DEGREES float%d test failed\n",((g_arrVecSizes[i])));
-            err = -1;
-        }
-        else
-        {
-            log_info("DEGREES float%d test passed\n", ((g_arrVecSizes[i])));
-        }
-
-        if (err)
-            break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    for (i=0; i < kTotalVecCount; i++) {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    if( err )
-        return err;
-
-    if( ! is_extension_available( device, "cl_khr_fp64" ) )
-    {
-        log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
-        return 0;
-    }
-
-    return test_degrees_double( device, context, queue, n_elems);
-}
-
-#pragma mark -
-
-const char *degrees_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees2_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees2_double(__global double2 *src, __global double2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees4_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees4_double(__global double4 *src, __global double4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees8_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees8_double(__global double8 *src, __global double8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees16_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees16_double(__global double16 *src, __global double16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees3_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees3_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
-"}\n";
-
-
-#define MAX_ERR  2.0f
-
-static int
-verify_degrees_double(double *inptr, double *outptr, int n)
-{
-    float error, max_error = 0.0f;
-    double   r, max_val = NAN;
-    int     i, j, max_index = 0;
-
-    for (i=0,j=0; i<n; i++,j++)
-    {
-        r = (180.0L / 3.14159265358979323846264338327950288L) * inptr[i];
-        error = Ulp_Error_Double( outptr[i], r );
-        if( fabsf(error) > max_error)
-        {
-            max_error = error;
-            max_index = i;
-            max_val = r;
-            if( fabsf(error) > MAX_ERR)
-            {
-                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
-                return 1;
-            }
-        }
-    }
-
-    log_info( "degreesd: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
-
-    return 0;
-}
-
-static int
-test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_double    *input_ptr[1], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    size_t threads[1];
-    int          num_elements;
-    int          err;
-    int          i;
-    MTdata        d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    // TODO: line below is clearly wrong
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_double((-100000. * M_PI), (100000. * M_PI) ,d);
-
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code_double, "test_degrees_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code_double, "test_degrees2_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code_double, "test_degrees4_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code_double, "test_degrees8_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code_double, "test_degrees16_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code_double, "test_degrees3_double" );
-    if (err)
-        return -1;
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-
-        // Line below is troublesome...
-        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        cl_uint dead = 0xdeaddead;
-        memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
-        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_degrees_double(input_ptr[0], output_ptr, n_elems*(i+1)))
-        {
-            log_error("DEGREES double%d test failed\n",((g_arrVecSizes[i])));
-            err = -1;
-        }
-        else
-        {
-            log_info("DEGREES double%d test passed\n", ((g_arrVecSizes[i])));
-        }
-
-        if (err)
-            break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    for (i=0; i < kTotalVecCount; i++) {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-
diff --git a/test_conformance/commonfns/test_fmax.cpp b/test_conformance/commonfns/test_fmax.cpp
deleted file mode 100644
index 9dab8107..00000000
--- a/test_conformance/commonfns/test_fmax.cpp
+++ /dev/null
@@ -1,233 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *fmax_kernel_code =
-    "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax2_kernel_code =
-    "__kernel void test_fmax2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax4_kernel_code =
-    "__kernel void test_fmax4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax8_kernel_code =
-    "__kernel void test_fmax8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax16_kernel_code =
-    "__kernel void test_fmax16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-
-static const char *fmax3_kernel_code =
-    "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    vstore3(fmax(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
-    "}\n";
-
-static int
-verify_fmax(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = (inptrA[i] >= inptrB[i]) ? inptrA[i] : inptrB[i];
-        if (r != outptr[i])
-        return -1;
-    }
-
-    return 0;
-}
-
-int
-test_fmax(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[3];
-    cl_float     *input_ptr[2], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    d = init_genrand( gRandomSeed );
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x20000000, 0x20000000, d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x20000000, 0x20000000,d );
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmax_kernel_code, "test_fmax" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmax2_kernel_code, "test_fmax2" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmax4_kernel_code, "test_fmax4" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmax8_kernel_code, "test_fmax8" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmax16_kernel_code, "test_fmax16" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmax3_kernel_code, "test_fmax3" );
-    if (err)
-    return -1;
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_fmax(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i]))))
-        {
-            log_error("FMAX float%d test failed\n", (g_arrVecSizes[i]));
-            err = -1;
-        }
-        else
-        {
-            log_info("FMAX float%d test passed\n", (g_arrVecSizes[i]));
-            err = 0;
-        }
-
-        if (err)
-        break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    return err;
-}
-
-
diff --git a/test_conformance/commonfns/test_fmaxf.cpp b/test_conformance/commonfns/test_fmaxf.cpp
deleted file mode 100644
index e0bc95ed..00000000
--- a/test_conformance/commonfns/test_fmaxf.cpp
+++ /dev/null
@@ -1,244 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *fmax_kernel_code =
-    "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax2_kernel_code =
-    "__kernel void test_fmax2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax4_kernel_code =
-    "__kernel void test_fmax4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax8_kernel_code =
-    "__kernel void test_fmax8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax16_kernel_code =
-    "__kernel void test_fmax16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax3_kernel_code =
-    "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    vstore3(fmax(vload3(tid,srcA), srcB[tid]),tid,dst);\n"
-    "}\n";
-
-static int
-verify_fmax(float *inptrA, float *inptrB, float *outptr, int n, int veclen)
-{
-    float       r;
-    int         i, j;
-
-    for (i=0; i<n; ) {
-        int ii = i/veclen;
-        for (j=0; j<veclen && i<n; ++j, ++i) {
-            r = (inptrA[i] >= inptrB[ii]) ? inptrA[i] : inptrB[ii];
-            if (r != outptr[i]) {
-                log_info("Verify noted discrepancy at %d (of %d) (vec %d, pos %d)\n",
-                         i,n,ii,j);
-                log_info("SHould be %f, is %f\n", r, outptr[i]);
-                log_info("Taking max of (%f,%f)\n", inptrA[i], inptrB[i]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-int
-test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[3];
-    cl_float    *input_ptr[2], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-        {
-            log_error("clCreateBuffer failed\n");
-            return -1;
-        }
-        streams[1] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, NULL);
-        if (!streams[1])
-        {
-            log_error("clCreateBuffer failed\n");
-            return -1;
-        }
-        streams[2] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, NULL);
-        if (!streams[2])
-        {
-            log_error("clCreateBuffer failed\n");
-            return -1;
-        }
-
-    d = init_genrand( gRandomSeed );
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-        {
-            p[i] = get_random_float(-0x20000000, 0x20000000, d);
-        }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-        {
-            p[i] = get_random_float(-0x20000000, 0x20000000, d);
-        }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
-                                (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-        {
-            log_error("clWriteArray failed\n");
-            return -1;
-        }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
-                                (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-        {
-            log_error("clWriteArray failed\n");
-            return -1;
-        }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmax_kernel_code, "test_fmax" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmax2_kernel_code, "test_fmax2" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmax4_kernel_code, "test_fmax4" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmax8_kernel_code, "test_fmax8" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmax16_kernel_code, "test_fmax16" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmax3_kernel_code, "test_fmax3" );
-    if (err)
-        return -1;
-
-    for (i=0; i < kTotalVecCount; i++)
-        {
-            err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-            err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-            err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-            if (err != CL_SUCCESS)
-                {
-                    log_error("clSetKernelArgs failed\n");
-                    return -1;
-                }
-        }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i < kTotalVecCount; i++)
-        {
-            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-            if (err != CL_SUCCESS)
-                {
-                    log_error("clEnqueueNDRangeKernel failed\n");
-                    return -1;
-                }
-
-            err = clEnqueueReadBuffer(queue, streams[2], true, 0, sizeof(cl_float)*num_elements,
-                                      output_ptr, 0, NULL, NULL);
-            if (err != CL_SUCCESS)
-                {
-                    log_error("clEnqueueReadBuffer failed\n");
-                    return -1;
-                }
-
-            if (verify_fmax(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i])), (g_arrVecSizes[i])))
-                {
-                    log_error("FMAX float%d,float test failed\n", (g_arrVecSizes[i]));
-                    err = -1;
-                }
-            else
-                {
-                    log_info("FMAX float%d,float test passed\n", (g_arrVecSizes[i]));
-                    err = 0;
-                }
-
-            if (err)
-                break;
-        }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i < kTotalVecCount; i++)
-        {
-            clReleaseKernel(kernel[i]);
-            clReleaseProgram(program[i]);
-        }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    return err;
-}
-
-
diff --git a/test_conformance/commonfns/test_fmin.cpp b/test_conformance/commonfns/test_fmin.cpp
deleted file mode 100644
index 0a2925f0..00000000
--- a/test_conformance/commonfns/test_fmin.cpp
+++ /dev/null
@@ -1,238 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *fmin_kernel_code =
-    "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin2_kernel_code =
-    "__kernel void test_fmin2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin4_kernel_code =
-    "__kernel void test_fmin4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin8_kernel_code =
-    "__kernel void test_fmin8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin16_kernel_code =
-    "__kernel void test_fmin16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-
-static const char *fmin3_kernel_code =
-    "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    vstore3(fmin(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
-    "}\n";
-
-int
-verify_fmin(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = (inptrA[i] > inptrB[i]) ? inptrB[i] : inptrA[i];
-        if (r != outptr[i])
-        return -1;
-    }
-
-    return 0;
-}
-
-int
-test_fmin(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[3];
-    cl_float    *input_ptr[2], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    size_t threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));;
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    d = init_genrand( gRandomSeed );
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x20000000, 0x20000000, d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x20000000, 0x20000000, d);
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
-                (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
-                (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmin_kernel_code, "test_fmin" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmin2_kernel_code, "test_fmin2" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmin4_kernel_code, "test_fmin4" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmin8_kernel_code, "test_fmin8" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmin16_kernel_code, "test_fmin16" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmin3_kernel_code, "test_fmin3" );
-    if (err)
-    return -1;
-
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_fmin(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i]))))
-        {
-            log_error("FMIN float%d test failed\n", (g_arrVecSizes[i]));
-            err = -1;
-        }
-        else
-        {
-            log_info("FMIN float%d test passed\n", (g_arrVecSizes[i]));
-            err = 0;
-        }
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    return err;
-}
-
-
diff --git a/test_conformance/commonfns/test_fminf.cpp b/test_conformance/commonfns/test_fminf.cpp
deleted file mode 100644
index 189d58a1..00000000
--- a/test_conformance/commonfns/test_fminf.cpp
+++ /dev/null
@@ -1,236 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *fmin_kernel_code =
-    "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin2_kernel_code =
-    "__kernel void test_fmin2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin4_kernel_code =
-    "__kernel void test_fmin4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin8_kernel_code =
-    "__kernel void test_fmin8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin16_kernel_code =
-    "__kernel void test_fmin16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin3_kernel_code =
-    "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    vstore3(fmin(vload3(tid,srcA), srcB[tid]),tid,dst);\n"
-    "}\n";
-
-static int
-verify_fmin(float *inptrA, float *inptrB, float *outptr, int n, int veclen)
-{
-    float       r;
-    int         i, j;
-
-    for (i=0; i<n; ) {
-    int ii = i/veclen;
-    for (j=0; j<veclen && i<n; ++j, ++i) {
-        r = (inptrA[i] > inptrB[ii]) ? inptrB[ii] : inptrA[i];
-        if (r != outptr[i])
-        return -1;
-    }
-    }
-
-    return 0;
-}
-
-int
-test_fminf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[3];
-    cl_float     *input_ptr[2], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata      d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    d = init_genrand( gRandomSeed );
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x20000000, 0x20000000, d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x20000000, 0x20000000, d);
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
-                (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
-                (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmin_kernel_code, "test_fmin" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmin2_kernel_code, "test_fmin2" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmin4_kernel_code, "test_fmin4" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmin8_kernel_code, "test_fmin8" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmin16_kernel_code, "test_fmin16" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmin3_kernel_code, "test_fmin3" );
-    if (err)
-    return -1;
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_fmin(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i])), (g_arrVecSizes[i])))
-        {
-            log_error("fmin float%d,float test failed\n", (g_arrVecSizes[i]));
-            err = -1;
-        }
-        else
-        {
-            log_info("fmin float%d,float test passed\n", (g_arrVecSizes[i]));
-            err = 0;
-        }
-
-        if (err)
-        break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    return err;
-}
-
-
diff --git a/test_conformance/commonfns/test_max.cpp b/test_conformance/commonfns/test_max.cpp
deleted file mode 100644
index 9f3b80ec..00000000
--- a/test_conformance/commonfns/test_max.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements * vecSize; i++ )
-    {
-        float v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ];
-        if( v != out[ i ] )
-        {
-            log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n",
-                i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
-            return -1;
-        }
-    }
-    return 0;
-}
-
-static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements * vecSize; i++ )
-    {
-        double v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ];
-        if( v != out[ i ] )
-        {
-            log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n",
-                i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
-            return -1;
-        }
-    }
-    return 0;
-}
-
-int test_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    return test_binary_fn( device, context, queue, n_elems, "max", true, max_verify_float, max_verify_double );
-}
-
-
diff --git a/test_conformance/commonfns/test_maxf.cpp b/test_conformance/commonfns/test_maxf.cpp
deleted file mode 100644
index f96df7ea..00000000
--- a/test_conformance/commonfns/test_maxf.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements; i++ )
-    {
-        for( int j = 0; j < vecSize; j++ )
-        {
-            float v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ];
-            if( v != out[ i * vecSize + j ] )
-            {
-                log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
-                return -1;
-            }
-        }
-    }
-    return 0;
-}
-
-static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements; i++ )
-    {
-        for( int j = 0; j < vecSize; j++ )
-        {
-            double v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ];
-            if(    v != out[ i * vecSize + j ] )
-            {
-                log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
-                return -1;
-            }
-        }
-    }
-    return 0;
-}
-
-int test_maxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    return test_binary_fn( device, context, queue, n_elems, "max", false, max_verify_float, max_verify_double );
-}
-
-
diff --git a/test_conformance/commonfns/test_min.cpp b/test_conformance/commonfns/test_min.cpp
deleted file mode 100644
index 707e24b6..00000000
--- a/test_conformance/commonfns/test_min.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements * vecSize; i++ )
-    {
-        float v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ];
-        if( v != out[ i ] ) {
-      log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
-            return -1;
-    }
-    }
-    return 0;
-}
-
-static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements * vecSize; i++ )
-    {
-        double v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ];
-        if( v != out[ i ] ) {
-      log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
-            return -1;
-    }
-    }
-    return 0;
-}
-
-int test_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    return test_binary_fn( device, context, queue, n_elems, "min", true, min_verify_float, min_verify_double );
-}
-
-
diff --git a/test_conformance/commonfns/test_minf.cpp b/test_conformance/commonfns/test_minf.cpp
deleted file mode 100644
index 71b1fbe0..00000000
--- a/test_conformance/commonfns/test_minf.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-#include "harness/errorHelpers.h"
-
-static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements; i++ )
-    {
-        for( int j = 0; j < vecSize; j++ )
-        {
-            float v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ];
-            if( v != out[ i * vecSize + j ] )
-            {
-                log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
-                return -1;
-            }
-        }
-    }
-    return 0;
-}
-
-static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
-{
-    int maxFail = 1;
-    int numFails = 0;
-    for( int i = 0; i < numElements; i++ )
-    {
-        for( int j = 0; j < vecSize; j++ )
-        {
-            double v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ];
-            if(    v != out[ i * vecSize + j ] )
-            {
-                log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
-                ++numFails;
-                if(numFails >= maxFail) {
-                return -1;
-            }
-        }
-    }
-    }
-    return 0;
-}
-
-int test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    return test_binary_fn( device, context, queue, n_elems, "min", false, min_verify_float, min_verify_double );
-}
-
-
diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp
index 5dedce3f..92c10100 100644
--- a/test_conformance/commonfns/test_mix.cpp
+++ b/test_conformance/commonfns/test_mix.cpp
@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
+// Copyright (c) 2023 The Khronos Group Inc.
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,179 +13,265 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
-
 #include <stdio.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 
 #include "procs.h"
+#include "test_base.h"
+
+
+const char *mix_fn_code_pattern =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s%s *x, __global %s%s *y, __global %s%s "
+    "*a, __global %s%s *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = mix(x[tid], y[tid], a[tid]);\n"
+    "}\n";
+
+const char *mix_fn_code_pattern_v3 =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *a, "
+    "__global %s *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    vstore3(mix(vload3(tid, x), vload3(tid, y), vload3(tid, a)), tid, "
+    "dst);\n"
+    "}\n";
+
+const char *mix_fn_code_pattern_v3_scalar =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *a, "
+    "__global %s *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    vstore3(mix(vload3(tid, x), vload3(tid, y), a[tid]), tid, dst);\n"
+    "}\n";
 
-const char *mix_kernel_code =
-"__kernel void test_mix(__global float *srcA, __global float *srcB, __global float *srcC, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = mix(srcA[tid], srcB[tid], srcC[tid]);\n"
-"}\n";
 
 #define MAX_ERR 1e-3
 
-float
-verify_mix(float *inptrA, float *inptrB, float *inptrC, float *outptr, int n)
+namespace {
+
+
+template <typename T>
+int verify_mix(const T *const inptrX, const T *const inptrY,
+               const T *const inptrA, const T *const outptr, const int n,
+               const int veclen, const bool vecParam)
 {
-    float       r, delta, max_err = 0.0f;
-    int         i;
+    T r;
+    float delta = 0.0f;
+    int i;
 
-    for (i=0; i<n; i++)
+    if (vecParam)
     {
-        r = inptrA[i] + ((inptrB[i] - inptrA[i]) * inptrC[i]);
-        delta = fabsf(r - outptr[i]) / r;
-        if(delta > max_err) max_err = delta;
-    }
-    return max_err;
-}
-
-int
-test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem            streams[4];
-    cl_float        *input_ptr[3], *output_ptr, *p;
-    cl_program        program;
-    cl_kernel        kernel;
-    size_t    threads[1];
-    float            max_err;
-    int                err;
-    int                i;
-    MTdata          d;
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[3])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] =  (float) genrand_real1(d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = (float) genrand_real1(d);
-    }
-    p = input_ptr[2];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = (float) genrand_real1(d);
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program, &kernel, 1, &mix_kernel_code, "test_mix" );
-    test_error( err, "Unable to create test kernel" );
-
-  err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-  err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2] );
-  err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    max_err = verify_mix(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
-    if (max_err > MAX_ERR)
-    {
-        log_error("MIX test failed %g max err\n", max_err);
-        err = -1;
+        for (i = 0; i < n * veclen; i++)
+        {
+            r = inptrX[i] + ((inptrY[i] - inptrX[i]) * inptrA[i]);
+            delta = fabs(double(r - outptr[i])) / r;
+            if (delta > MAX_ERR)
+            {
+                log_error(
+                    "%d) verification error: mix(%a, %a, %a) = *%a vs. %a\n", i,
+                    inptrX[i], inptrY[i], inptrA[i], r, outptr[i]);
+                return -1;
+            }
+        }
     }
     else
     {
-        log_info("MIX test passed %g max err\n", max_err);
-        err = 0;
+        for (int i = 0; i < n; ++i)
+        {
+            int ii = i / veclen;
+            int vi = i * veclen;
+            for (int j = 0; j < veclen; ++j, ++vi)
+            {
+                r = inptrX[vi] + ((inptrY[vi] - inptrX[vi]) * inptrA[i]);
+                delta = fabs(double(r - outptr[vi])) / r;
+                if (delta > MAX_ERR)
+                {
+                    log_error("{%d, element %d}) verification error: mix(%a, "
+                              "%a, %a) = *%a vs. %a\n",
+                              ii, j, inptrX[vi], inptrY[vi], inptrA[i], r,
+                              outptr[vi]);
+                    return -1;
+                }
+            }
+        }
     }
 
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
+    return 0;
+}
+} // namespace
+
+
+template <typename T>
+int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue,
+                int n_elems, bool vecParam)
+{
+    clMemWrapper streams[4];
+    std::vector<T> input_ptr[3], output_ptr;
+
+    std::vector<clProgramWrapper> programs;
+    std::vector<clKernelWrapper> kernels;
+
+    int err, i;
+    MTdataHolder d = MTdataHolder(gRandomSeed);
+
+    assert(BaseFunctionTest::type2name.find(sizeof(T))
+           != BaseFunctionTest::type2name.end());
+    auto tname = BaseFunctionTest::type2name[sizeof(T)];
+
+    programs.resize(kTotalVecCount);
+    kernels.resize(kTotalVecCount);
+
+    int num_elements = n_elems * (1 << (kTotalVecCount - 1));
+
+
+    for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements);
+    output_ptr.resize(num_elements);
+
+    for (i = 0; i < 4; i++)
+    {
+        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                    sizeof(T) * num_elements, NULL, &err);
+        test_error(err, "clCreateBuffer failed");
+    }
+
+    for (i = 0; i < num_elements; i++)
+    {
+        input_ptr[0][i] = (T)genrand_real1(d);
+        input_ptr[1][i] = (T)genrand_real1(d);
+        input_ptr[2][i] = (T)genrand_real1(d);
+    }
+
+    std::string pragma_str;
+    if (std::is_same<T, double>::value)
+    {
+        pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+    }
+
+    for (i = 0; i < 3; i++)
+    {
+        err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
+                                   sizeof(T) * num_elements,
+                                   &input_ptr[i].front(), 0, NULL, NULL);
+        test_error(err, "Unable to write input buffer");
+    }
+
+    char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
+
+    for (i = 0; i < kTotalVecCount; i++)
+    {
+        std::string kernelSource;
+        if (i >= kVectorSizeCount)
+        {
+            if (vecParam)
+            {
+                std::string str = mix_fn_code_pattern_v3;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str(), tname.c_str());
+            }
+            else
+            {
+                std::string str = mix_fn_code_pattern_v3_scalar;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str(), tname.c_str());
+            }
+        }
+        else
+        {
+            // regular path
+            std::string str = mix_fn_code_pattern;
+            kernelSource =
+                string_format(str, pragma_str.c_str(), tname.c_str(),
+                              vecSizeNames[i], tname.c_str(), vecSizeNames[i],
+                              tname.c_str(), vecParam ? vecSizeNames[i] : "",
+                              tname.c_str(), vecSizeNames[i]);
+        }
+        const char *programPtr = kernelSource.c_str();
+        err =
+            create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
+                                        (const char **)&programPtr, "test_fn");
+        test_error(err, "Unable to create kernel");
+
+        for (int j = 0; j < 4; j++)
+        {
+            err =
+                clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
+            test_error(err, "Unable to set kernel argument");
+        }
+
+        size_t threads = (size_t)n_elems;
+
+        err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
+                                     0, NULL, NULL);
+        test_error(err, "Unable to execute kernel");
+
+        err = clEnqueueReadBuffer(queue, streams[3], true, 0,
+                                  sizeof(T) * num_elements, &output_ptr[0], 0,
+                                  NULL, NULL);
+        test_error(err, "Unable to read results");
+
+        if (verify_mix(&input_ptr[0].front(), &input_ptr[1].front(),
+                       &input_ptr[2].front(), &output_ptr.front(), n_elems,
+                       g_arrVecSizes[i], vecParam))
+        {
+            log_error("mix %s%d%s test failed\n", tname.c_str(),
+                      ((g_arrVecSizes[i])),
+                      vecParam ? "" : std::string(", " + tname).c_str());
+            err = -1;
+        }
+        else
+        {
+            log_info("mix %s%d%s test passed\n", tname.c_str(),
+                     ((g_arrVecSizes[i])),
+                     vecParam ? "" : std::string(", " + tname).c_str());
+            err = 0;
+        }
+
+        if (err) break;
+    }
 
     return err;
 }
 
 
+cl_int MixTest::Run()
+{
+    cl_int error = CL_SUCCESS;
+
+    error = test_mix_fn<float>(device, context, queue, num_elems, vecParam);
+    test_error(error, "MixTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+    {
+        error =
+            test_mix_fn<double>(device, context, queue, num_elems, vecParam);
+        test_error(error, "MixTest::Run<double> failed");
+    }
+
+    return error;
+}
 
 
+int test_mix(cl_device_id device, cl_context context, cl_command_queue queue,
+             int n_elems)
+{
+    return MakeAndRunTest<MixTest>(device, context, queue, n_elems, "mix",
+                                   true);
+}
 
+
+int test_mixf(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<MixTest>(device, context, queue, n_elems, "mix",
+                                   false);
+}
diff --git a/test_conformance/commonfns/test_radians.cpp b/test_conformance/commonfns/test_radians.cpp
deleted file mode 100644
index 2eb0500f..00000000
--- a/test_conformance/commonfns/test_radians.cpp
+++ /dev/null
@@ -1,468 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-#ifndef M_PI
-#define M_PI    3.14159265358979323846264338327950288
-#endif
-
-static int test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
-
-
-const char *radians_kernel_code =
-"__kernel void test_radians(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians2_kernel_code =
-"__kernel void test_radians2(__global float2 *src, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians4_kernel_code =
-"__kernel void test_radians4(__global float4 *src, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians8_kernel_code =
-"__kernel void test_radians8(__global float8 *src, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians16_kernel_code =
-"__kernel void test_radians16(__global float16 *src, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians3_kernel_code =
-"__kernel void test_radians3(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(radians(vload3(tid,src)),tid,dst);\n"
-"}\n";
-
-
-#define MAX_ERR  2.0f
-
-static float
-verify_radians(float *inptr, float *outptr, int n)
-{
-    float error, max_error = 0.0f;
-    double   r, max_val = NAN;
-    int     i, j, max_index = 0;
-
-    for (i=0,j=0; i<n; i++,j++)
-    {
-        r = (M_PI / 180.0) * inptr[i];
-        error = Ulp_Error( outptr[i], r );
-        if( fabsf(error) > max_error)
-        {
-            max_error = error;
-            max_index = i;
-            max_val = r;
-            if( fabsf(error) > MAX_ERR)
-            {
-                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
-                return 1;
-            }
-        }
-    }
-
-    log_info( "radians: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
-
-    return 0;
-}
-
-
-int
-test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_float     *input_ptr[1], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    size_t       threads[1];
-    int          num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code, "test_radians" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code, "test_radians2" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code, "test_radians4" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code, "test_radians8" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code, "test_radians16" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code, "test_radians3" );
-    if (err)
-        return -1;
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        cl_uint dead = 0xdeaddead;
-        memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
-        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_radians(input_ptr[0], output_ptr, n_elems*(i+1)))
-        {
-            log_error("RADIANS float%d test failed\n",((g_arrVecSizes[i])));
-            err = -1;
-        }
-        else
-        {
-            log_info("RADIANS float%d test passed\n", ((g_arrVecSizes[i])));
-        }
-
-        if (err)
-            break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    for (i=0; i < kTotalVecCount; i++) {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(output_ptr);
-    if( err )
-        return err;
-
-    if( ! is_extension_available( device, "cl_khr_fp64" ) )
-    {
-        log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
-        return 0;
-    }
-
-    return test_radians_double( device,  context,  queue,  n_elems);
-}
-
-
-
-#pragma mark -
-
-const char *radians_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians2_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians2_double(__global double2 *src, __global double2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians4_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians4_double(__global double4 *src, __global double4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians8_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians8_double(__global double8 *src, __global double8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians16_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians16_double(__global double16 *src, __global double16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians3_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians3_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(radians(vload3(tid,src)),tid,dst);\n"
-"}\n";
-
-
-#define MAX_ERR  2.0f
-
-static double
-verify_radians_double(double *inptr, double *outptr, int n)
-{
-    float error, max_error = 0.0f;
-    double   r, max_val = NAN;
-    int     i, j, max_index = 0;
-
-    for (i=0,j=0; i<n; i++,j++)
-    {
-        r = (3.14159265358979323846264338327950288L / 180.0L) * inptr[i];
-        error = Ulp_Error_Double( outptr[i], r );
-        if( fabsf(error) > max_error)
-        {
-            max_error = error;
-            max_index = i;
-            max_val = r;
-            if( fabsf(error) > MAX_ERR)
-            {
-                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
-                return 1;
-            }
-        }
-    }
-
-    log_info( "radiansd: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
-
-    return 0;
-}
-
-
-int
-test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_double     *input_ptr[1], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    size_t       threads[1];
-    int          num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    //TODO: line below is clearly wrong
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_double((float)(-100000.0 * M_PI), (float)(100000.0 * M_PI) ,d);
-
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code_double, "test_radians_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code_double, "test_radians2_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code_double, "test_radians4_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code_double, "test_radians8_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code_double, "test_radians16_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code_double, "test_radians3_double" );
-    if (err)
-        return -1;
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        cl_uint dead = 0xdeaddead;
-        memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
-        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_radians_double(input_ptr[0], output_ptr, n_elems*(i+1)))
-        {
-            log_error("RADIANS double%d test failed\n",((g_arrVecSizes[i])));
-            err = -1;
-        }
-        else
-        {
-            log_info("RADIANS double%d test passed\n", ((g_arrVecSizes[i])));
-        }
-
-        if (err)
-            break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    for (i=0; i < kTotalVecCount; i++) {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
diff --git a/test_conformance/commonfns/test_sign.cpp b/test_conformance/commonfns/test_sign.cpp
deleted file mode 100644
index 198b8aba..00000000
--- a/test_conformance/commonfns/test_sign.cpp
+++ /dev/null
@@ -1,437 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int
-test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
-
-
-const char *sign_kernel_code =
-"__kernel void test_sign(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign2_kernel_code =
-"__kernel void test_sign2(__global float2 *src, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign4_kernel_code =
-"__kernel void test_sign4(__global float4 *src, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign8_kernel_code =
-"__kernel void test_sign8(__global float8 *src, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign16_kernel_code =
-"__kernel void test_sign16(__global float16 *src, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign3_kernel_code =
-"__kernel void test_sign3(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(sign(vload3(tid,src)), tid, dst);\n"
-"}\n";
-
-
-
-static int
-verify_sign(float *inptr, float *outptr, int n)
-{
-  float       r;
-  int         i;
-
-  for (i=0; i<n; i++)
-  {
-    if (inptr[i] > 0.0f)
-      r = 1.0f;
-    else if (inptr[i] < 0.0f)
-      r = -1.0f;
-    else
-      r = 0.0f;
-    if (r != outptr[i])
-      return -1;
-  }
-
-  return 0;
-}
-
-static const char *fn_names[] = { "SIGN float", "SIGN float2", "SIGN float4", "SIGN float8", "SIGN float16", "SIGN float3" };
-
-int
-test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-  cl_mem      streams[2];
-  cl_float    *input_ptr[1], *output_ptr, *p;
-  cl_program  program[kTotalVecCount];
-  cl_kernel   kernel[kTotalVecCount];
-  size_t  threads[1];
-  int num_elements;
-  int err;
-  int i;
-  MTdata    d;
-
-  num_elements = n_elems * 16;
-
-  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[0])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[1])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  d = init_genrand( gRandomSeed );
-  p = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x20000000, 0x20000000, d);
-  }
-  free_mtdata(d);   d = NULL;
-
-
-  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-
-  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &sign_kernel_code, "test_sign" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &sign2_kernel_code, "test_sign2" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &sign4_kernel_code, "test_sign4" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &sign8_kernel_code, "test_sign8" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &sign16_kernel_code, "test_sign16" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &sign3_kernel_code, "test_sign3" );
-  if (err)
-    return -1;
-
-  for (i=0; i<kTotalVecCount; i++)
-  {
-      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-      if (err != CL_SUCCESS)
-    {
-      log_error("clSetKernelArgs failed\n");
-      return -1;
-    }
-  }
-
-  threads[0] = (size_t)n_elems;
-  for (i=0; i<kTotalVecCount; i++) // change this so we test all
-  {
-    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-
-    if (verify_sign(input_ptr[0], output_ptr, n_elems*(i+1)))
-    {
-      log_error("%s test failed\n", fn_names[i]);
-      err = -1;
-    }
-    else
-    {
-      log_info("%s test passed\n", fn_names[i]);
-      err = 0;
-    }
-
-    if (err)
-      break;
-  }
-
-  clReleaseMemObject(streams[0]);
-  clReleaseMemObject(streams[1]);
-  for (i=0; i<kTotalVecCount; i++)
-  {
-    clReleaseKernel(kernel[i]);
-    clReleaseProgram(program[i]);
-  }
-  free(input_ptr[0]);
-  free(output_ptr);
-
-  if (err) return err;
-
-  if (!is_extension_available(device, "cl_khr_fp64"))
-  {
-      log_info("skipping double test -- cl_khr_fp64 not supported.\n");
-      return 0;
-  }
-
-    return test_sign_double( device, context, queue, n_elems);
-}
-
-#pragma mark -
-
-const char *sign_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign2_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign2_double(__global double2 *src, __global double2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign4_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign4_double(__global double4 *src, __global double4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign8_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign8_double(__global double8 *src, __global double8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign16_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign16_double(__global double16 *src, __global double16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign3_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign3_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(sign(vload3(tid,src)), tid, dst);\n"
-"}\n";
-
-
-static int
-verify_sign_double(double *inptr, double *outptr, int n)
-{
-  double       r;
-  int         i;
-
-  for (i=0; i<n; i++)
-  {
-    if (inptr[i] > 0.0)
-      r = 1.0;
-    else if (inptr[i] < 0.0)
-      r = -1.0;
-    else
-      r = 0.0f;
-    if (r != outptr[i])
-      return -1;
-  }
-
-  return 0;
-}
-
-static const char *fn_names_double[] = { "SIGN double", "SIGN double2", "SIGN double4", "SIGN double8", "SIGN double16", "SIGN double3" };
-
-int
-test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-  cl_mem      streams[2];
-  cl_double    *input_ptr[1], *output_ptr, *p;
-  cl_program  program[kTotalVecCount];
-  cl_kernel   kernel[kTotalVecCount];
-  size_t  threads[1];
-  int num_elements;
-  int err;
-  int i;
-  MTdata    d;
-
-  num_elements = n_elems * 16;
-
-  input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-  output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_double) * num_elements, NULL, NULL);
-  if (!streams[0])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_double) * num_elements, NULL, NULL);
-  if (!streams[1])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  d = init_genrand( gRandomSeed );
-  p = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-    p[i] = get_random_double(-0x20000000, 0x20000000, d);
-
-  free_mtdata(d);   d = NULL;
-
-
-  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-
-  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &sign_kernel_code_double, "test_sign_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &sign2_kernel_code_double, "test_sign2_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &sign4_kernel_code_double, "test_sign4_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &sign8_kernel_code_double, "test_sign8_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &sign16_kernel_code_double, "test_sign16_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &sign3_kernel_code_double, "test_sign3_double" );
-  if (err)
-    return -1;
-
-  for (i=0; i<kTotalVecCount; i++)
-  {
-      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-      if (err != CL_SUCCESS)
-    {
-      log_error("clSetKernelArgs failed\n");
-      return -1;
-    }
-  }
-
-  threads[0] = (size_t)n_elems;
-  for (i=0; i<kTotalVecCount; i++) // this hsould be changed
-  {
-    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-
-    if (verify_sign_double(input_ptr[0], output_ptr, n_elems*(i+1)))
-    {
-      log_error("%s test failed\n", fn_names_double[i]);
-      err = -1;
-    }
-    else
-    {
-      log_info("%s test passed\n", fn_names_double[i]);
-      err = 0;
-    }
-
-    if (err)
-      break;
-  }
-
-  clReleaseMemObject(streams[0]);
-  clReleaseMemObject(streams[1]);
-  for (i=0; i<kTotalVecCount; i++)
-  {
-    clReleaseKernel(kernel[i]);
-    clReleaseProgram(program[i]);
-  }
-  free(input_ptr[0]);
-  free(output_ptr);
-
-  return err;
-}
-
-
diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp
index c0cc1d40..31948d3f 100644
--- a/test_conformance/commonfns/test_smoothstep.cpp
+++ b/test_conformance/commonfns/test_smoothstep.cpp
@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
+// Copyright (c) 2023 The Khronos Group Inc.
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,270 +13,283 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
-
 #include <stdio.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 
 #include "procs.h"
+#include "test_base.h"
 
-static const char *smoothstep_kernel_code =
-"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
 
-static const char *smoothstep2_kernel_code =
-"__kernel void test_smoothstep2(__global float2 *edge0, __global float2 *edge1, __global float2 *x, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
+const char *smoothstep_fn_code_pattern =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s%s *e0, __global %s%s *e1, __global %s%s "
+    "*x, __global %s%s *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = smoothstep(e0[tid], e1[tid], x[tid]);\n"
+    "}\n";
 
-static const char *smoothstep4_kernel_code =
-"__kernel void test_smoothstep4(__global float4 *edge0, __global float4 *edge1, __global float4 *x, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
+const char *smoothstep_fn_code_pattern_v3 =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s *e0, __global %s *e1, __global %s *x, "
+    "__global %s *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    vstore3(smoothstep(vload3(tid,e0), vload3(tid,e1), vload3(tid,x)), "
+    "tid, dst);\n"
+    "}\n";
 
-static const char *smoothstep8_kernel_code =
-"__kernel void test_smoothstep8(__global float8 *edge0, __global float8 *edge1, __global float8 *x, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
+const char *smoothstep_fn_code_pattern_v3_scalar =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s *e0, __global %s *e1, __global %s *x, "
+    "__global %s *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    vstore3(smoothstep(e0[tid], e1[tid], vload3(tid,x)), tid, dst);\n"
+    "}\n";
 
-static const char *smoothstep16_kernel_code =
-"__kernel void test_smoothstep16(__global float16 *edge0, __global float16 *edge1, __global float16 *x, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep3_kernel_code =
-"__kernel void test_smoothstep3(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(smoothstep(vload3(tid,edge0),vload3(tid,edge1),vload3(tid,x)), tid, dst);\n"
-"}\n";
 
 #define MAX_ERR (1e-5f)
 
-static float
-verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr, int n)
+namespace {
+
+
+template <typename T>
+int verify_smoothstep(const T *const edge0, const T *const edge1,
+                      const T *const x, const T *const outptr, const int n,
+                      const int veclen, const bool vecParam)
 {
-  float       r, t, delta, max_err = 0.0f;
-  int         i;
+    T r, t;
+    float delta = 0;
 
-  for (i=0; i<n; i++)
-  {
-    t = (x[i] - edge0[i]) / (edge1[i] - edge0[i]);
-    if (t < 0.0f)
-      t = 0.0f;
-    else if (t > 1.0f)
-      t = 1.0f;
-    r = t * t * (3.0f - 2.0f * t);
-    delta = (float)fabs(r - outptr[i]);
-    if (delta > max_err)
-      max_err = delta;
-  }
-
-  return max_err;
-}
-
-const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4", "SMOOTHSTEP float8", "SMOOTHSTEP float16", "SMOOTHSTEP float3" };
-
-int
-test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-  cl_mem      streams[4];
-  cl_float    *input_ptr[3], *output_ptr, *p, *p_edge0;
-  cl_program  program[kTotalVecCount];
-  cl_kernel   kernel[kTotalVecCount];
-  size_t  threads[1];
-  float max_err;
-  int num_elements;
-  int err;
-  int i;
-  MTdata d;
-
-  num_elements = n_elems * 16;
-
-  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[0])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[1])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[2])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[3])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  p = input_ptr[0];
-  d = init_genrand( gRandomSeed );
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x00400000, 0x00400000, d);
-  }
-
-  p = input_ptr[1];
-  p_edge0 = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-  {
-    float edge0 = p_edge0[i];
-    float edge1;
-    do {
-      edge1 = get_random_float(-0x00400000, 0x00400000, d);
-      if (edge0 < edge1)
-        break;
-    } while (1);
-    p[i] = edge1;
-  }
-
-  p = input_ptr[2];
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x00400000, 0x00400000, d);
-  }
-  free_mtdata(d);
-  d = NULL;
-
-  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-
-  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &smoothstep_kernel_code, "test_smoothstep" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &smoothstep2_kernel_code, "test_smoothstep2" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &smoothstep4_kernel_code, "test_smoothstep4" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &smoothstep8_kernel_code, "test_smoothstep8" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &smoothstep16_kernel_code, "test_smoothstep16" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &smoothstep3_kernel_code, "test_smoothstep3" );
-  if (err)
-    return -1;
-
-  for (i=0; i<kTotalVecCount; i++)
-  {
-      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-      err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-      err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3] );
-      if (err != CL_SUCCESS)
+    if (vecParam)
     {
-      log_error("clSetKernelArgs failed\n");
-      return -1;
-    }
-  }
-
-
-  threads[0] = (size_t)n_elems;
-  for (i=0; i<kTotalVecCount; i++)
-  {
-    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-
-
-    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-
-    max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems * g_arrVecSizes[i]);
-
-    if (max_err > MAX_ERR)
-    {
-      log_error("%s test failed %g max err\n", fn_names[i], max_err);
-      err = -1;
+        for (int i = 0; i < n * veclen; i++)
+        {
+            t = (x[i] - edge0[i]) / (edge1[i] - edge0[i]);
+            if (t < 0.0f)
+                t = 0.0f;
+            else if (t > 1.0f)
+                t = 1.0f;
+            r = t * t * (3.0f - 2.0f * t);
+            delta = (float)fabs(r - outptr[i]);
+            if (delta > MAX_ERR)
+            {
+                log_error("%d) verification error: smoothstep(%a, %a, %a) = "
+                          "*%a vs. %a\n",
+                          i, x[i], edge0[i], edge1[i], r, outptr[i]);
+                return -1;
+            }
+        }
     }
     else
     {
-      log_info("%s test passed %g max err\n", fn_names[i], max_err);
-      err = 0;
+        for (int i = 0; i < n; ++i)
+        {
+            int ii = i / veclen;
+            int vi = i * veclen;
+            for (int j = 0; j < veclen; ++j, ++vi)
+            {
+                t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]);
+                if (t < 0.0f)
+                    t = 0.0f;
+                else if (t > 1.0f)
+                    t = 1.0f;
+                r = t * t * (3.0f - 2.0f * t);
+                delta = (float)fabs(r - outptr[vi]);
+                if (delta > MAX_ERR)
+                {
+                    log_error("{%d, element %d}) verification error: "
+                              "smoothstep(%a, %a, %a) = *%a vs. %a\n",
+                              ii, j, x[vi], edge0[i], edge1[i], r, outptr[vi]);
+                    return -1;
+                }
+            }
+        }
     }
+    return 0;
+}
 
-    if (err)
-      break;
-  }
-
-  clReleaseMemObject(streams[0]);
-  clReleaseMemObject(streams[1]);
-  clReleaseMemObject(streams[2]);
-  clReleaseMemObject(streams[3]);
-  for (i=0; i<kTotalVecCount; i++)
-  {
-    clReleaseKernel(kernel[i]);
-    clReleaseProgram(program[i]);
-  }
-  free(input_ptr[0]);
-  free(input_ptr[1]);
-  free(input_ptr[2]);
-  free(output_ptr);
-
-  return err;
 }
 
 
+template <typename T>
+int test_smoothstep_fn(cl_device_id device, cl_context context,
+                       cl_command_queue queue, int n_elems, bool vecParam)
+{
+    clMemWrapper streams[4];
+    std::vector<T> input_ptr[3], output_ptr;
+
+    std::vector<clProgramWrapper> programs;
+    std::vector<clKernelWrapper> kernels;
+
+    int err, i;
+    MTdataHolder d = MTdataHolder(gRandomSeed);
+
+    assert(BaseFunctionTest::type2name.find(sizeof(T))
+           != BaseFunctionTest::type2name.end());
+    auto tname = BaseFunctionTest::type2name[sizeof(T)];
+
+    programs.resize(kTotalVecCount);
+    kernels.resize(kTotalVecCount);
+
+    int num_elements = n_elems * (1 << (kTotalVecCount - 1));
+
+    for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements);
+    output_ptr.resize(num_elements);
+
+    for (i = 0; i < 4; i++)
+    {
+        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                    sizeof(T) * num_elements, NULL, &err);
+        test_error(err, "clCreateBuffer failed");
+    }
+
+    std::string pragma_str;
+    if (std::is_same<T, float>::value)
+    {
+        for (i = 0; i < num_elements; i++)
+        {
+            input_ptr[0][i] = get_random_float(-0x00200000, 0x00010000, d);
+            input_ptr[1][i] = get_random_float(input_ptr[0][i], 0x00200000, d);
+            input_ptr[2][i] = get_random_float(-0x20000000, 0x20000000, d);
+        }
+    }
+    else if (std::is_same<T, double>::value)
+    {
+        pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+        for (i = 0; i < num_elements; i++)
+        {
+            input_ptr[0][i] = get_random_double(-0x00200000, 0x00010000, d);
+            input_ptr[1][i] = get_random_double(input_ptr[0][i], 0x00200000, d);
+            input_ptr[2][i] = get_random_double(-0x20000000, 0x20000000, d);
+        }
+    }
+
+    for (i = 0; i < 3; i++)
+    {
+        err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
+                                   sizeof(T) * num_elements,
+                                   &input_ptr[i].front(), 0, NULL, NULL);
+        test_error(err, "Unable to write input buffer");
+    }
+
+    char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
+
+    for (i = 0; i < kTotalVecCount; i++)
+    {
+        std::string kernelSource;
+        if (i >= kVectorSizeCount)
+        {
+            if (vecParam)
+            {
+                std::string str = smoothstep_fn_code_pattern_v3;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str(), tname.c_str());
+            }
+            else
+            {
+                std::string str = smoothstep_fn_code_pattern_v3_scalar;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str(), tname.c_str());
+            }
+        }
+        else
+        {
+            // regular path
+            std::string str = smoothstep_fn_code_pattern;
+            kernelSource =
+                string_format(str, pragma_str.c_str(), tname.c_str(),
+                              vecParam ? vecSizeNames[i] : "", tname.c_str(),
+                              vecParam ? vecSizeNames[i] : "", tname.c_str(),
+                              vecSizeNames[i], tname.c_str(), vecSizeNames[i]);
+        }
+        const char *programPtr = kernelSource.c_str();
+        err =
+            create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
+                                        (const char **)&programPtr, "test_fn");
+        test_error(err, "Unable to create kernel");
+
+        for (int j = 0; j < 4; j++)
+        {
+            err =
+                clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
+            test_error(err, "Unable to set kernel argument");
+        }
+
+        size_t threads = (size_t)n_elems;
+
+        err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
+                                     0, NULL, NULL);
+        test_error(err, "Unable to execute kernel");
+
+        err = clEnqueueReadBuffer(queue, streams[3], true, 0,
+                                  sizeof(T) * num_elements, &output_ptr[0], 0,
+                                  NULL, NULL);
+        test_error(err, "Unable to read results");
+
+        if (verify_smoothstep((T *)&input_ptr[0].front(),
+                              (T *)&input_ptr[1].front(),
+                              (T *)&input_ptr[2].front(), &output_ptr[0],
+                              n_elems, g_arrVecSizes[i], vecParam))
+        {
+            log_error("smoothstep %s%d%s test failed\n", tname.c_str(),
+                      ((g_arrVecSizes[i])),
+                      vecParam ? "" : std::string(", " + tname).c_str());
+            err = -1;
+        }
+        else
+        {
+            log_info("smoothstep %s%d%s test passed\n", tname.c_str(),
+                     ((g_arrVecSizes[i])),
+                     vecParam ? "" : std::string(", " + tname).c_str());
+            err = 0;
+        }
+
+        if (err) break;
+    }
+
+    return err;
+}
+
+
+cl_int SmoothstepTest::Run()
+{
+    cl_int error = CL_SUCCESS;
+
+    error =
+        test_smoothstep_fn<float>(device, context, queue, num_elems, vecParam);
+    test_error(error, "SmoothstepTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+    {
+        error = test_smoothstep_fn<double>(device, context, queue, num_elems,
+                                           vecParam);
+        test_error(error, "SmoothstepTest::Run<double> failed");
+    }
+
+    return error;
+}
+
+
+int test_smoothstep(cl_device_id device, cl_context context,
+                    cl_command_queue queue, int n_elems)
+{
+    return MakeAndRunTest<SmoothstepTest>(device, context, queue, n_elems,
+                                          "smoothstep", true);
+}
+
+
+int test_smoothstepf(cl_device_id device, cl_context context,
+                     cl_command_queue queue, int n_elems)
+{
+    return MakeAndRunTest<SmoothstepTest>(device, context, queue, n_elems,
+                                          "smoothstep", false);
+}
diff --git a/test_conformance/commonfns/test_smoothstepf.cpp b/test_conformance/commonfns/test_smoothstepf.cpp
deleted file mode 100644
index ac09e9ec..00000000
--- a/test_conformance/commonfns/test_smoothstepf.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *smoothstep_kernel_code =
-"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep2_kernel_code =
-"__kernel void test_smoothstep2f(__global float *edge0, __global float *edge1, __global float2 *x, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep4_kernel_code =
-"__kernel void test_smoothstep4f(__global float *edge0, __global float *edge1, __global float4 *x, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-#define MAX_ERR (1e-5f)
-
-float verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr,
-                        int n, int veclen)
-{
-  float       r, t, delta, max_err = 0.0f;
-  int         i, j;
-
-  for (i = 0; i < n; ++i) {
-    int vi = i * veclen;
-    for (j = 0; j < veclen; ++j, ++vi) {
-      t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]);
-      if (t < 0.0f)
-        t = 0.0f;
-      else if (t > 1.0f)
-        t = 1.0f;
-      r = t * t * (3.0f - 2.0f * t);
-      delta = (float)fabs(r - outptr[vi]);
-      if (delta > max_err)
-        max_err = delta;
-    }
-  }
-  return max_err;
-}
-
-const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4"};
-
-int
-test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-  cl_mem      streams[4];
-  cl_float    *input_ptr[3], *output_ptr, *p, *p_edge0;
-  cl_program  program[3];
-  cl_kernel   kernel[3];
-  size_t  threads[1];
-  float max_err = 0.0f;
-  int num_elements;
-  int err;
-  int i;
-  MTdata d;
-
-  num_elements = n_elems * 4;
-
-  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[0])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[1])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[2])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[3])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  d = init_genrand( gRandomSeed );
-  p = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x00200000, 0x00200000, d);
-  }
-
-  p = input_ptr[1];
-  p_edge0 = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-  {
-    float edge0 = p_edge0[i];
-    float edge1;
-    do {
-      edge1 = get_random_float( -0x00200000, 0x00200000, d);
-      if (edge0 < edge1)
-        break;
-    } while (1);
-    p[i] = edge1;
-  }
-
-  p = input_ptr[2];
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x00200000, 0x00200000, d);
-  }
-  free_mtdata(d);
-  d = NULL;
-
-  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-
-  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &smoothstep_kernel_code, "test_smoothstep" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &smoothstep2_kernel_code, "test_smoothstep2f" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &smoothstep4_kernel_code, "test_smoothstep4f" );
-  if (err)
-    return -1;
-
-  for (i=0; i<3; i++)
-  {
-      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-      err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-      err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3] );
-      if (err != CL_SUCCESS)
-    {
-      log_error("clSetKernelArgs failed\n");
-      return -1;
-    }
-  }
-
-  threads[0] = (size_t)n_elems;
-  for (i=0; i<3; i++)
-  {
-    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-
-    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-
-    switch (i)
-    {
-      case 0:
-        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 1);
-        break;
-      case 1:
-        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 2);
-        break;
-      case 2:
-        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 4);
-        break;
-    }
-
-    if (max_err > MAX_ERR)
-    {
-      log_error("%s test failed %g max err\n", fn_names[i], max_err);
-      err = -1;
-    }
-    else
-    {
-      log_info("%s test passed %g max err\n", fn_names[i], max_err);
-      err = 0;
-    }
-
-    if (err)
-      break;
-  }
-
-  clReleaseMemObject(streams[0]);
-  clReleaseMemObject(streams[1]);
-  clReleaseMemObject(streams[2]);
-  clReleaseMemObject(streams[3]);
-  for (i=0; i<3; i++)
-  {
-    clReleaseKernel(kernel[i]);
-    clReleaseProgram(program[i]);
-  }
-  free(input_ptr[0]);
-  free(input_ptr[1]);
-  free(input_ptr[2]);
-  free(output_ptr);
-
-  return err;
-}
-
-
diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp
index ed5bc418..dc91766e 100644
--- a/test_conformance/commonfns/test_step.cpp
+++ b/test_conformance/commonfns/test_step.cpp
@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
+// Copyright (c) 2023 The Khronos Group Inc.
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,524 +13,252 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
-
 #include <stdio.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 
 #include "procs.h"
-
-static int
-test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
+#include "test_base.h"
 
 
-const char *step_kernel_code =
-"__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
+const char *step_fn_code_pattern = "%s\n" /* optional pragma */
+                                   "__kernel void test_fn(__global %s%s *edge, "
+                                   "__global %s%s *x, __global %s%s *dst)\n"
+                                   "{\n"
+                                   "    int  tid = get_global_id(0);\n"
+                                   "    dst[tid] = step(edge[tid], x[tid]);\n"
+                                   "}\n";
 
-const char *step2_kernel_code =
-"__kernel void test_step2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
+const char *step_fn_code_pattern_v3 =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s *edge, __global %s *x, __global %s "
+    "*dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    vstore3(step(vload3(tid,edge), vload3(tid,x)), tid, dst);\n"
+    "}\n";
 
-const char *step4_kernel_code =
-"__kernel void test_step4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step8_kernel_code =
-"__kernel void test_step8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step16_kernel_code =
-"__kernel void test_step16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step3_kernel_code =
-"__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
-"}\n";
+const char *step_fn_code_pattern_v3_scalar =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s *edge, __global %s *x, __global %s "
+    "*dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    vstore3(step(edge[tid], vload3(tid,x)), tid, dst);\n"
+    "}\n";
 
 
-int
-verify_step(float *inptrA, float *inptrB, float *outptr, int n)
+namespace {
+
+template <typename T>
+int verify_step(const T *const inptrA, const T *const inptrB,
+                const T *const outptr, const int n, const int veclen,
+                const bool vecParam)
 {
-    float       r;
-    int         i;
+    T r;
 
-    for (i=0; i<n; i++)
+    if (vecParam)
     {
-        r = (inptrB[i] < inptrA[i]) ? 0.0f : 1.0f;
-        if (r != outptr[i])
-            return -1;
+        for (int i = 0; i < n * veclen; i++)
+        {
+            r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0;
+            if (r != outptr[i]) return -1;
+        }
+    }
+    else
+    {
+        for (int i = 0; i < n;)
+        {
+            int ii = i / veclen;
+            for (int j = 0; j < veclen && i < n; ++j, ++i)
+            {
+                r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f;
+                if (r != outptr[i])
+                {
+                    log_error("Failure @ {%d, element %d}: step(%a,%a) -> *%a "
+                              "vs %a\n",
+                              ii, j, inptrA[ii], inptrB[i], r, outptr[i]);
+                    return -1;
+                }
+            }
+        }
     }
 
     return 0;
 }
 
-int
-test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+}
+
+
+template <typename T>
+int test_step_fn(cl_device_id device, cl_context context,
+                 cl_command_queue queue, int n_elems, bool vecParam)
 {
-    cl_mem      streams[3];
-    cl_float    *input_ptr[2], *output_ptr, *p;
-  cl_program  program[kTotalVecCount];
-  cl_kernel   kernel[kTotalVecCount];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-  num_elements = n_elems * 16;
+    clMemWrapper streams[3];
+    std::vector<T> input_ptr[2], output_ptr;
 
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
+    std::vector<clProgramWrapper> programs;
+    std::vector<clKernelWrapper> kernels;
+
+    int err, i;
+    MTdataHolder d = MTdataHolder(gRandomSeed);
+
+    assert(BaseFunctionTest::type2name.find(sizeof(T))
+           != BaseFunctionTest::type2name.end());
+    auto tname = BaseFunctionTest::type2name[sizeof(T)];
+    int num_elements = n_elems * (1 << (kTotalVecCount - 1));
+
+    programs.resize(kTotalVecCount);
+    kernels.resize(kTotalVecCount);
+
+    for (i = 0; i < 2; i++) input_ptr[i].resize(num_elements);
+    output_ptr.resize(num_elements);
+
+    for (i = 0; i < 3; i++)
     {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
+        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                    sizeof(T) * num_elements, NULL, &err);
+        test_error(err, "clCreateBuffer failed");
     }
 
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
+    std::string pragma_str;
+    if (std::is_same<T, float>::value)
     {
-        p[i] = get_random_float(-0x40000000, 0x40000000, d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x40000000, 0x40000000, d);
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
-    if (err) return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
-    if (err) return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
-    if (err) return -1;
-    err = create_single_kernel_helper(context, &program[3], &kernel[3], 1,
-                                      &step8_kernel_code, "test_step8");
-    if (err) return -1;
-    err = create_single_kernel_helper(context, &program[4], &kernel[4], 1,
-                                      &step16_kernel_code, "test_step16");
-    if (err) return -1;
-    err = create_single_kernel_helper(context, &program[5], &kernel[5], 1,
-                                      &step3_kernel_code, "test_step3");
-    if (err) return -1;
-
-  for (i=0; i <kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
+        for (i = 0; i < num_elements; i++)
         {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
+            input_ptr[0][i] = get_random_float(-0x40000000, 0x40000000, d);
+            input_ptr[1][i] = get_random_float(-0x40000000, 0x40000000, d);
+        }
+    }
+    else if (std::is_same<T, double>::value)
+    {
+        pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+        for (i = 0; i < num_elements; i++)
+        {
+            input_ptr[0][i] = get_random_double(-0x40000000, 0x40000000, d);
+            input_ptr[1][i] = get_random_double(-0x40000000, 0x40000000, d);
         }
     }
 
-    threads[0] = (size_t)n_elems;
-  for (i=0; i<kTotalVecCount; i++)
+    for (i = 0; i < 2; i++)
     {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
+        err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
+                                   sizeof(T) * num_elements,
+                                   &input_ptr[i].front(), 0, NULL, NULL);
+        test_error(err, "Unable to write input buffer");
+    }
+
+    char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
+
+    for (i = 0; i < kTotalVecCount; i++)
+    {
+        std::string kernelSource;
+        if (i >= kVectorSizeCount)
         {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
+            if (vecParam)
+            {
+                std::string str = step_fn_code_pattern_v3;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str());
+            }
+            else
+            {
+                std::string str = step_fn_code_pattern_v3_scalar;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str());
+            }
+        }
+        else
+        {
+            // regular path
+            std::string str = step_fn_code_pattern;
+            kernelSource =
+                string_format(str, pragma_str.c_str(), tname.c_str(),
+                              vecParam ? vecSizeNames[i] : "", tname.c_str(),
+                              vecSizeNames[i], tname.c_str(), vecSizeNames[i]);
+        }
+        const char *programPtr = kernelSource.c_str();
+        err =
+            create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
+                                        (const char **)&programPtr, "test_fn");
+        test_error(err, "Unable to create kernel");
+
+        for (int j = 0; j < 3; j++)
+        {
+            err =
+                clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
+            test_error(err, "Unable to set kernel argument");
         }
 
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
+        size_t threads = (size_t)n_elems;
+
+        err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
+                                     0, NULL, NULL);
+        test_error(err, "Unable to execute kernel");
+
+        err = clEnqueueReadBuffer(queue, streams[2], true, 0,
+                                  sizeof(T) * num_elements, &output_ptr[0], 0,
+                                  NULL, NULL);
+        test_error(err, "Unable to read results");
+
+        err = verify_step(&input_ptr[0].front(), &input_ptr[1].front(),
+                          &output_ptr.front(), n_elems, g_arrVecSizes[i],
+                          vecParam);
+        if (err)
         {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
+            log_error("step %s%d%s test failed\n", tname.c_str(),
+                      ((g_arrVecSizes[i])),
+                      vecParam ? "" : std::string(", " + tname).c_str());
+            err = -1;
         }
-
-        switch (i)
+        else
         {
-            case 0:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems);
-                if (err)
-                    log_error("STEP float test failed\n");
-                else
-                    log_info("STEP float test passed\n");
-                break;
-
-            case 1:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
-                if (err)
-                    log_error("STEP float2 test failed\n");
-                else
-                    log_info("STEP float2 test passed\n");
-                break;
-
-            case 2:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
-                if (err)
-                    log_error("STEP float4 test failed\n");
-                else
-                    log_info("STEP float4 test passed\n");
-                break;
-
-        case 3:
-        err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
-        if (err)
-          log_error("STEP float8 test failed\n");
-        else
-          log_info("STEP float8 test passed\n");
-        break;
-
-        case 4:
-        err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
-        if (err)
-          log_error("STEP float16 test failed\n");
-        else
-          log_info("STEP float16 test passed\n");
-        break;
-
-        case 5:
-        err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
-        if (err)
-          log_error("STEP float3 test failed\n");
-        else
-          log_info("STEP float3 test passed\n");
-        break;
+            log_info("step %s%d%s test passed\n", tname.c_str(),
+                     ((g_arrVecSizes[i])),
+                     vecParam ? "" : std::string(", " + tname).c_str());
+            err = 0;
         }
 
         if (err)
             break;
     }
 
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-  for (i=0; i<kTotalVecCount; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    if( err )
-        return err;
-
-    if( ! is_extension_available( device, "cl_khr_fp64" ))
-        return 0;
-
-    return test_step_double( device, context, queue, n_elems);
-}
-
-
-#pragma mark -
-
-const char *step_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step2_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step2_double(__global double2 *srcA, __global double2 *srcB, __global double2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step4_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step4_double(__global double4 *srcA, __global double4 *srcB, __global double4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step8_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step8_double(__global double8 *srcA, __global double8 *srcB, __global double8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step16_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step16_double(__global double16 *srcA, __global double16 *srcB, __global double16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step3_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
-"}\n";
-
-
-int
-verify_step_double(double *inptrA, double *inptrB, double *outptr, int n)
-{
-    double       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0;
-        if (r != outptr[i])
-            return -1;
-    }
-
-    return 0;
-}
-
-static int
-test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem      streams[3];
-    cl_double    *input_ptr[2], *output_ptr, *p;
-    cl_program  program[kTotalVecCount];
-    cl_kernel   kernel[kTotalVecCount];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-    num_elements = n_elems * 16;
-
-    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_double(-0x40000000, 0x40000000, d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_double(-0x40000000, 0x40000000, d);
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
-    if (err)
-        return -1;
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        switch (i)
-        {
-            case 0:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems);
-                if (err)
-                    log_error("STEP double test failed\n");
-                else
-                    log_info("STEP double test passed\n");
-                break;
-
-            case 1:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
-                if (err)
-                    log_error("STEP double2 test failed\n");
-                else
-                    log_info("STEP double2 test passed\n");
-                break;
-
-            case 2:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
-                if (err)
-                    log_error("STEP double4 test failed\n");
-                else
-                    log_info("STEP double4 test passed\n");
-                break;
-
-        case 3:
-        err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
-        if (err)
-          log_error("STEP double8 test failed\n");
-        else
-          log_info("STEP double8 test passed\n");
-        break;
-
-        case 4:
-        err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
-        if (err)
-          log_error("STEP double16 test failed\n");
-        else
-          log_info("STEP double16 test passed\n");
-        break;
-
-        case 5:
-        err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
-        if (err)
-          log_error("STEP double3 test failed\n");
-        else
-          log_info("STEP double3 test passed\n");
-        break;
-        }
-
-        if (err)
-            break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
     return err;
 }
 
+
+cl_int StepTest::Run()
+{
+    cl_int error = CL_SUCCESS;
+
+    error = test_step_fn<float>(device, context, queue, num_elems, vecParam);
+    test_error(error, "StepTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+    {
+        error =
+            test_step_fn<double>(device, context, queue, num_elems, vecParam);
+        test_error(error, "StepTest::Run<double> failed");
+    }
+
+    return error;
+}
+
+
+int test_step(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<StepTest>(device, context, queue, n_elems, "step",
+                                    true);
+}
+
+
+int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue,
+               int n_elems)
+{
+    return MakeAndRunTest<StepTest>(device, context, queue, n_elems, "step",
+                                    false);
+}
diff --git a/test_conformance/commonfns/test_stepf.cpp b/test_conformance/commonfns/test_stepf.cpp
deleted file mode 100644
index efada227..00000000
--- a/test_conformance/commonfns/test_stepf.cpp
+++ /dev/null
@@ -1,546 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
-
-
-static const char *step_kernel_code =
-"__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step2_kernel_code =
-"__kernel void test_step2(__global float *srcA, __global float2 *srcB, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step4_kernel_code =
-"__kernel void test_step4(__global float *srcA, __global float4 *srcB, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step8_kernel_code =
-"__kernel void test_step8(__global float *srcA, __global float8 *srcB, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step16_kernel_code =
-"__kernel void test_step16(__global float *srcA, __global float16 *srcB, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step3_kernel_code =
-"__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
-"}\n";
-
-
-static int
-verify_step( cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n, int veclen)
-{
-    float       r;
-    int         i, j;
-
-    for (i=0; i<n; ) {
-        int ii = i/veclen;
-        for (j=0; j<veclen && i<n; ++j, ++i) {
-            r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f;
-            if (r != outptr[i])
-            {
-                log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem      streams[3];
-    cl_float    *input_ptr[2], *output_ptr, *p;
-    cl_program  program[kTotalVecCount];
-    cl_kernel   kernel[kTotalVecCount];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-    num_elements = n_elems * 16;
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x40000000, 0x40000000, d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x40000000, 0x40000000, d);
-    }
-    free_mtdata(d);   d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
-    if (err)
-        return -1;
-
-    for (i=0; i <kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        switch (i)
-        {
-            case 0:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
-                if (err)
-                    log_error("STEP float test failed\n");
-                else
-                    log_info("STEP float test passed\n");
-                break;
-
-            case 1:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
-                if (err)
-                    log_error("STEP float2 test failed\n");
-                else
-                    log_info("STEP float2 test passed\n");
-                break;
-
-            case 2:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
-                if (err)
-                    log_error("STEP float4 test failed\n");
-                else
-                    log_info("STEP float4 test passed\n");
-                break;
-
-            case 3:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
-                if (err)
-                    log_error("STEP float8 test failed\n");
-                else
-                    log_info("STEP float8 test passed\n");
-                break;
-
-            case 4:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
-                if (err)
-                    log_error("STEP float16 test failed\n");
-                else
-                    log_info("STEP float16 test passed\n");
-                break;
-
-            case 5:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
-                if (err)
-                    log_error("STEP float3 test failed\n");
-                else
-                    log_info("STEP float3 test passed\n");
-                break;
-        }
-
-        if (err)
-            break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    if(err)
-        return err;
-
-    if( ! is_extension_available( device, "cl_khr_fp64" ))
-    {
-        log_info( "Device does not support cl_khr_fp64.  Skipping double precision tests.\n" );
-        return 0;
-    }
-
-    return test_stepf_double( device, context, queue, n_elems);
-}
-
-#pragma mark -
-
-static const char *step_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step2_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step2_double(__global double *srcA, __global double2 *srcB, __global double2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step4_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step4_double(__global double *srcA, __global double4 *srcB, __global double4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step8_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step8_double(__global double *srcA, __global double8 *srcB, __global double8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step16_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step16_double(__global double *srcA, __global double16 *srcB, __global double16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step3_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
-"}\n";
-
-
-static int
-verify_step_double(cl_double *inptrA, cl_double *inptrB, cl_double *outptr, int n, int veclen)
-{
-    double r;
-    int    i, j;
-
-    for (i=0; i<n; ) {
-        int ii = i/veclen;
-        for (j=0; j<veclen && i<n; ++j, ++i) {
-            r = (inptrB[i] < inptrA[ii]) ? 0.0 : 1.0;
-            if (r != outptr[i])
-            {
-                log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem      streams[3];
-    cl_double  *input_ptr[2], *output_ptr, *p;
-    cl_program  program[kTotalVecCount];
-    cl_kernel   kernel[kTotalVecCount];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata    d;
-    num_elements = n_elems * 16;
-
-    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_double(-0x40000000, 0x40000000, d);
-
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_double(-0x40000000, 0x40000000, d);
-
-    free_mtdata(d);   d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
-    if (err)
-        return -1;
-
-    for (i=0; i <kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        switch (i)
-        {
-            case 0:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
-                if (err)
-                    log_error("STEP double test failed\n");
-                else
-                    log_info("STEP double test passed\n");
-                break;
-
-            case 1:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
-                if (err)
-                    log_error("STEP double2 test failed\n");
-                else
-                    log_info("STEP double2 test passed\n");
-                break;
-
-            case 2:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
-                if (err)
-                    log_error("STEP double4 test failed\n");
-                else
-                    log_info("STEP double4 test passed\n");
-                break;
-
-            case 3:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
-                if (err)
-                    log_error("STEP double8 test failed\n");
-                else
-                    log_info("STEP double8 test passed\n");
-                break;
-
-            case 4:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
-                if (err)
-                    log_error("STEP double16 test failed\n");
-                else
-                    log_info("STEP double16 test passed\n");
-                break;
-
-            case 5:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
-                if (err)
-                    log_error("STEP double3 test failed\n");
-                else
-                    log_info("STEP double3 test passed\n");
-                break;
-        }
-
-        if (err)
-            break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    return err;
-}
-
diff --git a/test_conformance/commonfns/test_unary_fn.cpp b/test_conformance/commonfns/test_unary_fn.cpp
new file mode 100644
index 00000000..fed4389d
--- /dev/null
+++ b/test_conformance/commonfns/test_unary_fn.cpp
@@ -0,0 +1,365 @@
+//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <vector>
+
+#include "harness/deviceInfo.h"
+#include "harness/typeWrappers.h"
+
+#include "procs.h"
+#include "test_base.h"
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846264338327950288
+#endif
+
+
+// clang-format off
+const char *unary_fn_code_pattern =
+"%s\n" /* optional pragma */
+"__kernel void test_fn(__global %s%s *src, __global %s%s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = %s(src[tid]);\n"
+"}\n";
+
+const char *unary_fn_code_pattern_v3 =
+"%s\n" /* optional pragma */
+"__kernel void test_fn(__global %s *src, __global %s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(%s(vload3(tid,src)), tid, dst);\n"
+"}\n";
+// clang-format on
+
+
+#define MAX_ERR 2.0f
+
+namespace {
+
+
+template <typename T> float UlpFn(const T &val, const double &r)
+{
+    if (std::is_same<T, double>::value)
+        return Ulp_Error_Double(val, r);
+    else if (std::is_same<T, float>::value)
+        return Ulp_Error(val, r);
+    else if (std::is_same<T, half>::value)
+        return Ulp_Error(val, r);
+}
+
+
+template <typename T>
+int verify_degrees(const T *const inptr, const T *const outptr, int n)
+{
+    float error, max_error = 0.0f;
+    double r, max_val = NAN;
+    int max_index = 0;
+
+    for (int i = 0, j = 0; i < n; i++, j++)
+    {
+        r = (180.0 / M_PI) * inptr[i];
+
+        error = UlpFn(outptr[i], r);
+
+        if (fabsf(error) > max_error)
+        {
+            max_error = error;
+            max_index = i;
+            max_val = r;
+            if (fabsf(error) > MAX_ERR)
+            {
+                log_error("%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n",
+                          i, inptr[i], r, outptr[i], r, outptr[i], error);
+                return 1;
+            }
+        }
+    }
+
+    log_info("degrees: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n",
+             max_error, max_index, max_val, outptr[max_index], max_val,
+             outptr[max_index]);
+
+    return 0;
+}
+
+
+template <typename T>
+int verify_radians(const T *const inptr, const T *const outptr, int n)
+{
+    float error, max_error = 0.0f;
+    double r, max_val = NAN;
+    int max_index = 0;
+
+    for (int i = 0, j = 0; i < n; i++, j++)
+    {
+        r = (M_PI / 180.0) * inptr[i];
+        error = Ulp_Error(outptr[i], r);
+        if (fabsf(error) > max_error)
+        {
+            max_error = error;
+            max_index = i;
+            max_val = r;
+            if (fabsf(error) > MAX_ERR)
+            {
+                log_error("%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n",
+                          i, inptr[i], r, outptr[i], r, outptr[i], error);
+                return 1;
+            }
+        }
+    }
+
+    log_info("radians: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n",
+             max_error, max_index, max_val, outptr[max_index], max_val,
+             outptr[max_index]);
+
+    return 0;
+}
+
+
+template <typename T>
+int verify_sign(const T *const inptr, const T *const outptr, int n)
+{
+    T r = 0;
+    for (int i = 0; i < n; i++)
+    {
+        if (inptr[i] > 0.0f)
+            r = 1.0;
+        else if (inptr[i] < 0.0f)
+            r = -1.0;
+        else
+            r = 0.0;
+        if (r != outptr[i]) return -1;
+    }
+    return 0;
+}
+
+}
+
+
+template <typename T>
+int test_unary_fn(cl_device_id device, cl_context context,
+                  cl_command_queue queue, int n_elems,
+                  const std::string &fnName, VerifyFuncUnary<T> verifyFn)
+{
+    clMemWrapper streams[2];
+    std::vector<T> input_ptr, output_ptr;
+
+    std::vector<clProgramWrapper> programs;
+    std::vector<clKernelWrapper> kernels;
+
+    int err, i;
+    MTdataHolder d = MTdataHolder(gRandomSeed);
+
+    assert(BaseFunctionTest::type2name.find(sizeof(T))
+           != BaseFunctionTest::type2name.end());
+    auto tname = BaseFunctionTest::type2name[sizeof(T)];
+
+    programs.resize(kTotalVecCount);
+    kernels.resize(kTotalVecCount);
+
+    int num_elements = n_elems * (1 << (kTotalVecCount - 1));
+
+    input_ptr.resize(num_elements);
+    output_ptr.resize(num_elements);
+
+    for (i = 0; i < 2; i++)
+    {
+        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                    sizeof(T) * num_elements, NULL, &err);
+        test_error(err, "clCreateBuffer failed");
+    }
+
+    std::string pragma_str;
+    if (std::is_same<T, float>::value)
+    {
+        for (int j = 0; j < num_elements; j++)
+        {
+            input_ptr[j] = get_random_float((float)(-100000.f * M_PI),
+                                            (float)(100000.f * M_PI), d);
+        }
+    }
+    else if (std::is_same<T, double>::value)
+    {
+        pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+        for (int j = 0; j < num_elements; j++)
+        {
+            input_ptr[j] =
+                get_random_double(-100000.0 * M_PI, 100000.0 * M_PI, d);
+        }
+    }
+
+    err = clEnqueueWriteBuffer(queue, streams[0], true, 0,
+                               sizeof(T) * num_elements, &input_ptr.front(), 0,
+                               NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    for (i = 0; i < kTotalVecCount; i++)
+    {
+        std::string kernelSource;
+        char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
+
+        if (i >= kVectorSizeCount)
+        {
+            std::string str = unary_fn_code_pattern_v3;
+            kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(),
+                                         tname.c_str(), fnName.c_str());
+        }
+        else
+        {
+            std::string str = unary_fn_code_pattern;
+            kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(),
+                                         vecSizeNames[i], tname.c_str(),
+                                         vecSizeNames[i], fnName.c_str());
+        }
+
+        /* Create kernels */
+        const char *programPtr = kernelSource.c_str();
+        err =
+            create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
+                                        (const char **)&programPtr, "test_fn");
+
+        err = clSetKernelArg(kernels[i], 0, sizeof streams[0], &streams[0]);
+        err |= clSetKernelArg(kernels[i], 1, sizeof streams[1], &streams[1]);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+
+        // Line below is troublesome...
+        size_t threads = (size_t)num_elements / ((g_arrVecSizes[i]));
+        err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
+                                     0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        cl_uint dead = 42;
+        memset_pattern4(&output_ptr[0], &dead, sizeof(T) * num_elements);
+        err = clEnqueueReadBuffer(queue, streams[1], true, 0,
+                                  sizeof(T) * num_elements, &output_ptr[0], 0,
+                                  NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verifyFn((T *)&input_ptr.front(), (T *)&output_ptr.front(),
+                     n_elems * (i + 1)))
+        {
+            log_error("%s %s%d test failed\n", fnName.c_str(), tname.c_str(),
+                      ((g_arrVecSizes[i])));
+            err = -1;
+        }
+        else
+        {
+            log_info("%s %s%d test passed\n", fnName.c_str(), tname.c_str(),
+                     ((g_arrVecSizes[i])));
+        }
+
+        if (err) break;
+    }
+
+    return err;
+}
+
+
+cl_int DegreesTest::Run()
+{
+    cl_int error = test_unary_fn<float>(device, context, queue, num_elems,
+                                        fnName.c_str(), verify_degrees<float>);
+    test_error(error, "DegreesTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+    {
+        error = test_unary_fn<double>(device, context, queue, num_elems,
+                                      fnName.c_str(), verify_degrees<double>);
+        test_error(error, "DegreesTest::Run<double> failed");
+    }
+
+    return error;
+}
+
+
+cl_int RadiansTest::Run()
+{
+    cl_int error = test_unary_fn<float>(device, context, queue, num_elems,
+                                        fnName.c_str(), verify_radians<float>);
+    test_error(error, "RadiansTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+    {
+        error = test_unary_fn<double>(device, context, queue, num_elems,
+                                      fnName.c_str(), verify_radians<double>);
+        test_error(error, "RadiansTest::Run<double> failed");
+    }
+
+    return error;
+}
+
+
+cl_int SignTest::Run()
+{
+    cl_int error = test_unary_fn<float>(device, context, queue, num_elems,
+                                        fnName.c_str(), verify_sign<float>);
+    test_error(error, "SignTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+    {
+        error = test_unary_fn<double>(device, context, queue, num_elems,
+                                      fnName.c_str(), verify_sign<double>);
+        test_error(error, "SignTest::Run<double> failed");
+    }
+
+    return error;
+}
+
+
+int test_degrees(cl_device_id device, cl_context context,
+                 cl_command_queue queue, int n_elems)
+{
+    return MakeAndRunTest<DegreesTest>(device, context, queue, n_elems,
+                                       "degrees");
+}
+
+
+int test_radians(cl_device_id device, cl_context context,
+                 cl_command_queue queue, int n_elems)
+{
+    return MakeAndRunTest<RadiansTest>(device, context, queue, n_elems,
+                                       "radians");
+}
+
+
+int test_sign(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<SignTest>(device, context, queue, n_elems, "sign");
+}

From 85c95297ac3f83ad69e85f581b576187fed3ca17 Mon Sep 17 00:00:00 2001
From: Tuomas Lauttia <tuomas.lauttia@tuni.fi>
Date: Tue, 16 May 2023 18:46:28 +0300
Subject: [PATCH 09/30] Removed hardcoded -cl-std=CL2.0 build option from
 progvar tests (#1710)

* Removed hardcoded -cl-std=CL2.0 build option from progvar tests

Fixes issue #1380
https://github.com/KhronosGroup/OpenCL-CTS/issues/1380

These changes will query the device for the latest supported CL C
version instead of using a hardcoded value. The create_single_kernel_helper
function queries for the latest CL C version internally, so calls to
create_single_kernel_helper_with_build_options were replaced with calls
to create_single_kernel_helper instead.

* Fixed formatting
---
 test_conformance/basic/test_progvar.cpp | 54 ++++++++++++++-----------
 1 file changed, 31 insertions(+), 23 deletions(-)

diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp
index bbaa87c3..a46713e9 100644
--- a/test_conformance/basic/test_progvar.cpp
+++ b/test_conformance/basic/test_progvar.cpp
@@ -25,7 +25,6 @@
 
 #define ALIGNMENT 128
 
-#define OPTIONS "-cl-std=CL2.0"
 
 // NUM_ROUNDS must be at least 1.
 // It determines how many sets of random data we push through the global
@@ -439,6 +438,7 @@ static int l_capacity(cl_device_id device, cl_context context,
 static int l_user_type(cl_device_id device, cl_context context,
                        cl_command_queue queue, bool separate_compile);
 
+static std::string get_build_options(cl_device_id device);
 
 ////////////////////
 // File scope function definitions
@@ -1116,9 +1116,8 @@ static int l_write_read_for_type(cl_device_id device, cl_context context,
     clProgramWrapper program;
     clKernelWrapper writer;
 
-    status = create_single_kernel_helper_with_build_options(
-        context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer",
-        OPTIONS);
+    status = create_single_kernel_helper(context, &program, &writer,
+                                         ksrc.num_str(), ksrc.strs(), "writer");
     test_error_ret(status, "Failed to create program for read-after-write test",
                    status);
 
@@ -1326,9 +1325,8 @@ static int l_init_write_read_for_type(cl_device_id device, cl_context context,
     clProgramWrapper program;
     clKernelWrapper writer;
 
-    status = create_single_kernel_helper_with_build_options(
-        context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer",
-        OPTIONS);
+    status = create_single_kernel_helper(context, &program, &writer,
+                                         ksrc.num_str(), ksrc.strs(), "writer");
     test_error_ret(status,
                    "Failed to create program for init-read-after-write test",
                    status);
@@ -1581,9 +1579,9 @@ static int l_capacity(cl_device_id device, cl_context context,
     clProgramWrapper program;
     clKernelWrapper get_max_size;
 
-    status = create_single_kernel_helper_with_build_options(
-        context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(),
-        "get_max_size", OPTIONS);
+    status = create_single_kernel_helper(context, &program, &get_max_size,
+                                         ksrc.num_str(), ksrc.strs(),
+                                         "get_max_size");
     test_error_ret(status, "Failed to create program for capacity test",
                    status);
 
@@ -1737,6 +1735,8 @@ static int l_user_type(cl_device_id device, cl_context context,
 
     clProgramWrapper program;
 
+    const std::string options = get_build_options(device);
+
     if (separate_compile)
     {
         // Separate compilation flow.
@@ -1757,15 +1757,15 @@ static int l_user_type(cl_device_id device, cl_context context,
                        "Failed to create writer program for user type test",
                        status);
 
-        status = clCompileProgram(writer_program, 1, &device, OPTIONS, 0, 0, 0,
-                                  0, 0);
+        status = clCompileProgram(writer_program, 1, &device, options.c_str(),
+                                  0, 0, 0, 0, 0);
         if (check_error(
                 status,
                 "Failed to compile writer program for user type test (%s)",
                 IGetErrorString(status)))
         {
             print_build_log(writer_program, 1, &device, wksrc.num_str(),
-                            wksrc.strs(), wksrc.lengths(), OPTIONS);
+                            wksrc.strs(), wksrc.lengths(), options.c_str());
             return status;
         }
 
@@ -1775,15 +1775,15 @@ static int l_user_type(cl_device_id device, cl_context context,
                        "Failed to create reader program for user type test",
                        status);
 
-        status = clCompileProgram(reader_program, 1, &device, OPTIONS, 0, 0, 0,
-                                  0, 0);
+        status = clCompileProgram(reader_program, 1, &device, options.c_str(),
+                                  0, 0, 0, 0, 0);
         if (check_error(
                 status,
                 "Failed to compile reader program for user type test (%s)",
                 IGetErrorString(status)))
         {
             print_build_log(reader_program, 1, &device, rksrc.num_str(),
-                            rksrc.strs(), rksrc.lengths(), OPTIONS);
+                            rksrc.strs(), rksrc.lengths(), options.c_str());
             return status;
         }
 
@@ -1813,23 +1813,23 @@ static int l_user_type(cl_device_id device, cl_context context,
         int status = CL_SUCCESS;
 
         status = create_single_kernel_helper_create_program(
-            context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS);
+            context, &program, ksrc.num_str(), ksrc.strs(), options.c_str());
         if (check_error(status,
                         "Failed to build program for user type test (%s)",
                         IGetErrorString(status)))
         {
             print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
-                            ksrc.lengths(), OPTIONS);
+                            ksrc.lengths(), options.c_str());
             return status;
         }
 
-        status = clBuildProgram(program, 1, &device, OPTIONS, 0, 0);
+        status = clBuildProgram(program, 1, &device, options.c_str(), 0, 0);
         if (check_error(status,
                         "Failed to compile program for user type test (%s)",
                         IGetErrorString(status)))
         {
             print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
-                            ksrc.lengths(), OPTIONS);
+                            ksrc.lengths(), options.c_str());
             return status;
         }
     }
@@ -1935,6 +1935,14 @@ static int l_user_type(cl_device_id device, cl_context context,
     return err;
 }
 
+static std::string get_build_options(cl_device_id device)
+{
+    std::string options = "-cl-std=CL";
+    Version latest_cl_c_version = get_device_latest_cl_c_version(device);
+    options += latest_cl_c_version.to_string();
+    return options;
+}
+
 // Determines whether its valid to skip this test based on the driver version
 // and the features it optionally supports.
 // Whether the test should be skipped is writen into the out paramter skip.
@@ -2102,9 +2110,9 @@ int test_progvar_func_scope(cl_device_id device, cl_context context,
     clProgramWrapper program;
     clKernelWrapper test_bump;
 
-    status = create_single_kernel_helper_with_build_options(
-        context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump",
-        OPTIONS);
+    status =
+        create_single_kernel_helper(context, &program, &test_bump,
+                                    ksrc.num_str(), ksrc.strs(), "test_bump");
     test_error_ret(status,
                    "Failed to create program for function static variable test",
                    status);

From 8f3027387a18bb0dbf0c298b8efdf617b7085833 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 18 May 2023 09:29:27 +0100
Subject: [PATCH 10/30] Fix some Wformat size_t warnings (#1726)

Printing a `size_t` requires the `%zu` specifier.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/basic/test_get_linear_ids.cpp     | 14 +++++++++-----
 .../cl_khr_command_buffer/basic_command_buffer.h   |  2 +-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/test_conformance/basic/test_get_linear_ids.cpp b/test_conformance/basic/test_get_linear_ids.cpp
index 3496fd0b..ee7dfb2f 100644
--- a/test_conformance/basic/test_get_linear_ids.cpp
+++ b/test_conformance/basic/test_get_linear_ids.cpp
@@ -104,15 +104,19 @@ test_get_linear_ids(cl_device_id device, cl_context context, cl_command_queue qu
 
         switch (dims) {
         case 1:
-            log_info("  testing offset=%u global=%u local=%u...\n", gwo[0], gws[0], lws[0]);
+            log_info("  testing offset=%zu global=%zu local=%zu...\n", gwo[0],
+                     gws[0], lws[0]);
             break;
         case 2:
-            log_info("  testing offset=(%u,%u) global=(%u,%u) local=(%u,%u)...\n",
-                    gwo[0], gwo[1], gws[0], gws[1], lws[0], lws[1]);
+            log_info("  testing offset=(%zu,%zu) global=(%zu,%zu) "
+                     "local=(%zu,%zu)...\n",
+                     gwo[0], gwo[1], gws[0], gws[1], lws[0], lws[1]);
             break;
         case 3:
-            log_info("  testing offset=(%u,%u,%u) global=(%u,%u,%u) local=(%u,%u,%u)...\n",
-                    gwo[0], gwo[1], gwo[2], gws[0], gws[1], gws[2], lws[0], lws[1], lws[2]);
+            log_info("  testing offset=(%zu,%zu,%zu) global=(%zu,%zu,%zu) "
+                     "local=(%zu,%zu,%zu)...\n",
+                     gwo[0], gwo[1], gwo[2], gws[0], gws[1], gws[2], lws[0],
+                     lws[1], lws[2]);
             break;
         }
 
diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
index a20229e0..b1d36024 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
@@ -28,7 +28,7 @@
     {                                                                          \
         if (reference != result)                                               \
         {                                                                      \
-            log_error("Expected %d was %d at index %u\n", reference, result,   \
+            log_error("Expected %d was %d at index %zu\n", reference, result,  \
                       index);                                                  \
             return TEST_FAIL;                                                  \
         }                                                                      \

From 4f62adf1ca2cadc864dac844e643a20aa8b76b29 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 23 May 2023 09:43:25 +0100
Subject: [PATCH 11/30] computeinfo: fix use of uninitialized config_size_ret
 (#1727)

The variable `config_size_ret` is only assigned to inside the `if`.
If the condition is false, we would read uninitialized data.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/computeinfo/main.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp
index 9cecabea..b1d73af3 100644
--- a/test_conformance/computeinfo/main.cpp
+++ b/test_conformance/computeinfo/main.cpp
@@ -439,8 +439,8 @@ int getPlatformConfigInfo(cl_platform_id platform, config_info* info)
                 err = clGetPlatformInfo(platform, info->opcode, config_size_set,
                                         &info->config.cl_name_version_single,
                                         &config_size_ret);
+                size_err = config_size_set != config_size_ret;
             }
-            size_err = config_size_set != config_size_ret;
             break;
         default:
             log_error("Unknown config type: %d\n", info->config_type);
@@ -585,8 +585,8 @@ int getConfigInfo(cl_device_id device, config_info* info)
                 err = clGetDeviceInfo(device, info->opcode, config_size_set,
                                       &info->config.cl_name_version_single,
                                       &config_size_ret);
+                size_err = config_size_set != config_size_ret;
             }
-            size_err = config_size_set != config_size_ret;
             break;
         default:
             log_error("Unknown config type: %d\n", info->config_type);

From 957e3b398500c5436283300b56d74466cfc36338 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Wed, 24 May 2023 11:11:23 +0100
Subject: [PATCH 12/30] Convert some if-else chains to switch statements
 (#1730)

All of these if-else chains compare against enums, which is better
done using switch statements.  This helps avoid some
`-Wsometimes-uninitialized` warnings of variables that are assigned
inside the switch.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 .../allocations/allocation_execute.cpp        | 36 ++++---
 test_conformance/api/test_null_buffer_arg.cpp | 25 +++--
 .../images/clCopyImage/test_loops.cpp         | 98 +++++++++----------
 .../images/clFillImage/test_loops.cpp         | 55 +++++------
 4 files changed, 110 insertions(+), 104 deletions(-)

diff --git a/test_conformance/allocations/allocation_execute.cpp b/test_conformance/allocations/allocation_execute.cpp
index 9d0e8777..5a77c3a7 100644
--- a/test_conformance/allocations/allocation_execute.cpp
+++ b/test_conformance/allocations/allocation_execute.cpp
@@ -79,20 +79,30 @@ int check_image(cl_command_queue queue, cl_mem mem) {
         return -1;
     }
 
-    if (type == CL_MEM_OBJECT_BUFFER) {
-        log_error("Expected image object, not buffer.\n");
-        return -1;
-    } else if (type == CL_MEM_OBJECT_IMAGE2D) {
-        error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
-        if (error) {
-            print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
+    switch (type)
+    {
+        case CL_MEM_OBJECT_BUFFER:
+            log_error("Expected image object, not buffer.\n");
             return -1;
-        }
-        error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
-        if (error) {
-            print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
-            return -1;
-        }
+        case CL_MEM_OBJECT_IMAGE2D:
+            error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width,
+                                   NULL);
+            if (error)
+            {
+                print_error(error,
+                            "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
+                return -1;
+            }
+            error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height),
+                                   &height, NULL);
+            if (error)
+            {
+                print_error(error,
+                            "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
+                return -1;
+            }
+            break;
+        default: log_error("unexpected object type"); return -1;
     }
 
 
diff --git a/test_conformance/api/test_null_buffer_arg.cpp b/test_conformance/api/test_null_buffer_arg.cpp
index 75bdd479..83fcb636 100644
--- a/test_conformance/api/test_null_buffer_arg.cpp
+++ b/test_conformance/api/test_null_buffer_arg.cpp
@@ -64,16 +64,21 @@ static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel,
     cl_int status;
     const char *typestr;
 
-    if (type == NON_NULL_PATH) {
-        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
-        typestr = "non-NULL";
-    } else if (type == ADDROF_NULL_PATH) {
-        test_buf = NULL;
-        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
-        typestr = "&NULL";
-    } else if (type == NULL_PATH) {
-        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
-        typestr = "NULL";
+    switch (type)
+    {
+        case NON_NULL_PATH:
+            status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
+            typestr = "non-NULL";
+            break;
+        case ADDROF_NULL_PATH:
+            test_buf = NULL;
+            status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
+            typestr = "&NULL";
+            break;
+        case NULL_PATH:
+            status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
+            typestr = "NULL";
+            break;
     }
 
     log_info("Testing setKernelArgs with %s buffer.\n", typestr);
diff --git a/test_conformance/images/clCopyImage/test_loops.cpp b/test_conformance/images/clCopyImage/test_loops.cpp
index 6ee1e536..e839cfdf 100644
--- a/test_conformance/images/clCopyImage/test_loops.cpp
+++ b/test_conformance/images/clCopyImage/test_loops.cpp
@@ -41,60 +41,52 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q
         }
     }
 
-    if( testMethod == k1D )
+    switch (testMethod)
     {
-        name = "1D -> 1D";
-        imageType = CL_MEM_OBJECT_IMAGE1D;
-    }
-    else if( testMethod == k2D )
-    {
-        name = "2D -> 2D";
-        imageType = CL_MEM_OBJECT_IMAGE2D;
-    }
-    else if( testMethod == k3D )
-    {
-        name = "3D -> 3D";
-        imageType = CL_MEM_OBJECT_IMAGE3D;
-    }
-    else if( testMethod == k1DArray )
-    {
-        name = "1D array -> 1D array";
-        imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY;
-    }
-    else if( testMethod == k2DArray )
-    {
-        name = "2D array -> 2D array";
-        imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
-    }
-    else if( testMethod == k2DTo3D )
-    {
-        name = "2D -> 3D";
-        imageType = CL_MEM_OBJECT_IMAGE3D;
-    }
-    else if( testMethod == k3DTo2D )
-    {
-        name = "3D -> 2D";
-        imageType = CL_MEM_OBJECT_IMAGE3D;
-    }
-    else if( testMethod == k2DArrayTo2D )
-    {
-        name = "2D array -> 2D";
-        imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
-    }
-    else if( testMethod == k2DTo2DArray )
-    {
-        name = "2D -> 2D array";
-        imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
-    }
-    else if( testMethod == k2DArrayTo3D )
-    {
-        name = "2D array -> 3D";
-        imageType = CL_MEM_OBJECT_IMAGE3D;
-    }
-    else if( testMethod == k3DTo2DArray )
-    {
-        name = "3D -> 2D array";
-        imageType = CL_MEM_OBJECT_IMAGE3D;
+        case k1D:
+            name = "1D -> 1D";
+            imageType = CL_MEM_OBJECT_IMAGE1D;
+            break;
+        case k2D:
+            name = "2D -> 2D";
+            imageType = CL_MEM_OBJECT_IMAGE2D;
+            break;
+        case k3D:
+            name = "3D -> 3D";
+            imageType = CL_MEM_OBJECT_IMAGE3D;
+            break;
+        case k1DArray:
+            name = "1D array -> 1D array";
+            imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+            break;
+        case k2DArray:
+            name = "2D array -> 2D array";
+            imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+            break;
+        case k2DTo3D:
+            name = "2D -> 3D";
+            imageType = CL_MEM_OBJECT_IMAGE3D;
+            break;
+        case k3DTo2D:
+            name = "3D -> 2D";
+            imageType = CL_MEM_OBJECT_IMAGE3D;
+            break;
+        case k2DArrayTo2D:
+            name = "2D array -> 2D";
+            imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+            break;
+        case k2DTo2DArray:
+            name = "2D -> 2D array";
+            imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+            break;
+        case k2DArrayTo3D:
+            name = "2D array -> 3D";
+            imageType = CL_MEM_OBJECT_IMAGE3D;
+            break;
+        case k3DTo2DArray:
+            name = "3D -> 2D array";
+            imageType = CL_MEM_OBJECT_IMAGE3D;
+            break;
     }
 
     if(gTestMipmaps)
diff --git a/test_conformance/images/clFillImage/test_loops.cpp b/test_conformance/images/clFillImage/test_loops.cpp
index 759f48d2..126ea0eb 100644
--- a/test_conformance/images/clFillImage/test_loops.cpp
+++ b/test_conformance/images/clFillImage/test_loops.cpp
@@ -33,35 +33,34 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q
     cl_mem_object_type imageType;
     test_func test_fn;
 
-    if ( testMethod == k1D )
+    switch (testMethod)
     {
-        name = "1D Image Fill";
-        imageType = CL_MEM_OBJECT_IMAGE1D;
-        test_fn = &test_fill_image_set_1D;
-    }
-    else if ( testMethod == k2D )
-    {
-        name = "2D Image Fill";
-        imageType = CL_MEM_OBJECT_IMAGE2D;
-        test_fn = &test_fill_image_set_2D;
-    }
-    else if ( testMethod == k1DArray )
-    {
-        name = "1D Image Array Fill";
-        imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY;
-        test_fn = &test_fill_image_set_1D_array;
-    }
-    else if ( testMethod == k2DArray )
-    {
-        name = "2D Image Array Fill";
-        imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
-        test_fn = &test_fill_image_set_2D_array;
-    }
-    else if ( testMethod == k3D )
-    {
-        name = "3D Image Fill";
-        imageType = CL_MEM_OBJECT_IMAGE3D;
-        test_fn = &test_fill_image_set_3D;
+        case k1D:
+            name = "1D Image Fill";
+            imageType = CL_MEM_OBJECT_IMAGE1D;
+            test_fn = &test_fill_image_set_1D;
+            break;
+        case k2D:
+            name = "2D Image Fill";
+            imageType = CL_MEM_OBJECT_IMAGE2D;
+            test_fn = &test_fill_image_set_2D;
+            break;
+        case k1DArray:
+            name = "1D Image Array Fill";
+            imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+            test_fn = &test_fill_image_set_1D_array;
+            break;
+        case k2DArray:
+            name = "2D Image Array Fill";
+            imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+            test_fn = &test_fill_image_set_2D_array;
+            break;
+        case k3D:
+            name = "3D Image Fill";
+            imageType = CL_MEM_OBJECT_IMAGE3D;
+            test_fn = &test_fill_image_set_3D;
+            break;
+        default: log_error("Unhandled method\n"); return -1;
     }
 
     log_info( "Running %s tests...\n", name );

From 3e8898ffeb1478c96c440a158db2e7d662b26a30 Mon Sep 17 00:00:00 2001
From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com>
Date: Wed, 24 May 2023 16:55:25 +0100
Subject: [PATCH 13/30] Deduplicate test_basic int2float/float2int (#1537)

Merge int2float,float2int.

Signed-off-by: John Kesapides <john.kesapides@arm.com>
---
 test_conformance/basic/CMakeLists.txt     |   2 +-
 test_conformance/basic/test_int2float.cpp | 191 +++++++++++-----------
 2 files changed, 95 insertions(+), 98 deletions(-)

diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt
index dde3311d..adf24bd8 100644
--- a/test_conformance/basic/CMakeLists.txt
+++ b/test_conformance/basic/CMakeLists.txt
@@ -11,7 +11,7 @@ set(${MODULE_NAME}_SOURCES
     test_multireadimageonefmt.cpp test_multireadimagemultifmt.cpp
     test_imagedim.cpp
     test_vloadstore.cpp
-    test_int2float.cpp test_float2int.cpp
+    test_int2float.cpp
     test_createkernelsinprogram.cpp
     test_hostptr.cpp
     test_explicit_s2v.cpp
diff --git a/test_conformance/basic/test_int2float.cpp b/test_conformance/basic/test_int2float.cpp
index 3a8458c9..c5afc244 100644
--- a/test_conformance/basic/test_int2float.cpp
+++ b/test_conformance/basic/test_int2float.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -21,123 +21,120 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <algorithm>
+#include <vector>
 
 #include "procs.h"
 
-const char *int2float_kernel_code =
-"__kernel void test_int2float(__global int *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = (float)src[tid];\n"
-"\n"
-"}\n";
-
-
-int
-verify_int2float(cl_int *inptr, cl_float *outptr, int n)
+namespace {
+const char *int2float_kernel_code = R"(
+__kernel void test_X2Y(__global TYPE_X *src, __global TYPE_Y *dst)
 {
-    int     i;
+    int  tid = get_global_id(0);
 
-    for (i=0; i<n; i++)
-    {
-        if (outptr[i] != (float)inptr[i])
-        {
-            log_error("INT2FLOAT test failed\n");
-            return -1;
-        }
-    }
+    dst[tid] = (TYPE_Y)src[tid];
 
-    log_info("INT2FLOAT test passed\n");
-    return 0;
+})";
+
+template <typename T> const char *Type2str() { return ""; }
+template <> const char *Type2str<cl_int>() { return "int"; }
+template <> const char *Type2str<cl_float>() { return "float"; }
+
+template <typename T> void generate_random_inputs(std::vector<T> &v)
+{
+    RandomSeed seed(gRandomSeed);
+
+    auto random_generator = [&seed]() {
+        return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31),
+                                MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed);
+    };
+
+    std::generate(v.begin(), v.end(), random_generator);
 }
 
-int
-test_int2float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+template <typename Tx, typename Ty> bool equal_value(Tx a, Ty b)
 {
-    cl_mem            streams[2];
-    cl_int            *input_ptr;
-    cl_float        *output_ptr;
-    cl_program        program;
-    cl_kernel        kernel;
-    size_t    threads[1];
-    int                err;
-    int                i;
-    MTdata          d;
+    return a == (Tx)b;
+}
+
+template <typename Tx, typename Ty>
+int verify_X2Y(std::vector<Tx> input, std::vector<Ty> output,
+               const char *test_name)
+{
+
+    if (!std::equal(output.begin(), output.end(), input.begin(),
+                    equal_value<Tx, Ty>))
+    {
+        log_error("%s test failed\n", test_name);
+        return -1;
+    }
+
+    log_info("%s test passed\n", test_name);
+    return 0;
+}
+template <typename Tx, typename Ty>
+int test_X2Y(cl_device_id device, cl_context context, cl_command_queue queue,
+             int num_elements, const char *test_name)
+{
+    clMemWrapper streams[2];
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int err;
+
+
+    std::vector<Tx> input(num_elements);
+    std::vector<Ty> output(num_elements);
 
-    input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
+                                sizeof(Tx) * num_elements, nullptr, &err);
+    test_error(err, "clCreateBuffer failed.");
     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
+                                sizeof(Ty) * num_elements, nullptr, &err);
+    test_error(err, "clCreateBuffer failed.");
 
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        input_ptr[i] = (cl_int)get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
-    free_mtdata(d); d = NULL;
+    generate_random_inputs(input);
 
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0,
+                               sizeof(Tx) * num_elements, input.data(), 0,
+                               nullptr, nullptr);
+    test_error(err, "clEnqueueWriteBuffer failed.");
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1, &int2float_kernel_code, "test_int2float");
-    if (err != CL_SUCCESS)
-    {
-        log_error("create_single_kernel_helper failed\n");
-        return -1;
-    }
+    std::string build_options;
+    build_options.append("-DTYPE_X=").append(Type2str<Tx>());
+    build_options.append(" -DTYPE_Y=").append(Type2str<Ty>());
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &int2float_kernel_code, "test_X2Y",
+                                      build_options.c_str());
+    test_error(err, "create_single_kernel_helper failed.");
 
     err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
+    test_error(err, "clSetKernelArg failed.");
 
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
+    size_t threads[] = { (size_t)num_elements };
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, nullptr, 0,
+                                 nullptr, nullptr);
+    test_error(err, "clEnqueueNDRangeKernel failed.");
 
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
+    err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
+                              sizeof(Ty) * num_elements, output.data(), 0,
+                              nullptr, nullptr);
+    test_error(err, "clEnqueueReadBuffer failed.");
 
-    err = verify_int2float(input_ptr, output_ptr, num_elements);
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
+    err = verify_X2Y(input, output, test_name);
 
     return err;
 }
-
-
-
-
-
+}
+int test_int2float(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int num_elements)
+{
+    return test_X2Y<cl_int, cl_float>(device, context, queue, num_elements,
+                                      "INT2FLOAT");
+}
+int test_float2int(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int num_elements)
+{
+    return test_X2Y<cl_float, cl_int>(device, context, queue, num_elements,
+                                      "FLOAT2INT");
+}

From 4dece20f7db75819eafa35981ca7c01cee70ca4b Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Mon, 29 May 2023 15:04:04 +0200
Subject: [PATCH 14/30] Added cl_khr_fp16 extension support for
 test_vector_swizzle from basic (#1729)

* Added cl_khr_fp16 extension support for vector_swizzle from basic (issue #142, basic)

* Added code review related fix
---
 .../basic/test_vector_swizzle.cpp             | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/test_conformance/basic/test_vector_swizzle.cpp b/test_conformance/basic/test_vector_swizzle.cpp
index 884bcf36..fdbc8919 100644
--- a/test_conformance/basic/test_vector_swizzle.cpp
+++ b/test_conformance/basic/test_vector_swizzle.cpp
@@ -22,6 +22,8 @@
 #include "procs.h"
 #include "harness/testHarness.h"
 
+static std::string pragma_extension;
+
 template <int N> struct TestInfo
 {
 };
@@ -629,7 +631,9 @@ static int test_vectype(const char* type_name, cl_device_id device,
         clProgramWrapper program;
         clKernelWrapper kernel;
 
-        const char* xyzw_source = TestInfo<N>::kernel_source_xyzw;
+        std::string program_src =
+            pragma_extension + std::string(TestInfo<N>::kernel_source_xyzw);
+        const char* xyzw_source = program_src.c_str();
         error = create_single_kernel_helper(
             context, &program, &kernel, 1, &xyzw_source,
             "test_vector_swizzle_xyzw", buildOptions.c_str());
@@ -643,7 +647,9 @@ static int test_vectype(const char* type_name, cl_device_id device,
         clProgramWrapper program;
         clKernelWrapper kernel;
 
-        const char* sN_source = TestInfo<N>::kernel_source_sN;
+        std::string program_src =
+            pragma_extension + std::string(TestInfo<N>::kernel_source_sN);
+        const char* sN_source = program_src.c_str();
         error = create_single_kernel_helper(
             context, &program, &kernel, 1, &sN_source, "test_vector_swizzle_sN",
             buildOptions.c_str());
@@ -660,7 +666,9 @@ static int test_vectype(const char* type_name, cl_device_id device,
         const Version device_version = get_device_cl_version(device);
         if (device_version >= Version(3, 0))
         {
-            const char* rgba_source = TestInfo<N>::kernel_source_rgba;
+            std::string program_src =
+                pragma_extension + std::string(TestInfo<N>::kernel_source_rgba);
+            const char* rgba_source = program_src.c_str();
             error = create_single_kernel_helper(
                 context, &program, &kernel, 1, &rgba_source,
                 "test_vector_swizzle_rgba", buildOptions.c_str());
@@ -689,6 +697,7 @@ int test_vector_swizzle(cl_device_id device, cl_context context,
                         cl_command_queue queue, int num_elements)
 {
     int hasDouble = is_extension_available(device, "cl_khr_fp64");
+    int hasHalf = is_extension_available(device, "cl_khr_fp16");
 
     int result = TEST_PASS;
     result |= test_type<cl_char>("char", device, context, queue);
@@ -703,8 +712,14 @@ int test_vector_swizzle(cl_device_id device, cl_context context,
         result |= test_type<cl_ulong>("ulong", device, context, queue);
     }
     result |= test_type<cl_float>("float", device, context, queue);
+    if (hasHalf)
+    {
+        pragma_extension = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+        result |= test_type<cl_half>("half", device, context, queue);
+    }
     if (hasDouble)
     {
+        pragma_extension = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
         result |= test_type<cl_double>("double", device, context, queue);
     }
     return result;

From 35b8db35c657c8ed14ba564e2fb65490cf0a2c4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Jastrz=C4=99bski?= <p.k.jastrzebski@gmail.com>
Date: Tue, 30 May 2023 17:43:58 +0200
Subject: [PATCH 15/30] Add check for CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR
 after completion. (#1740)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add check for CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR after completion.

Added check for state CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR in state info tests.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Add wait for event.

Wait for event to guarantee that a command-buffer has finished executing by this point.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

* Add new event to wait for.

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>

---------

Signed-off-by: Paweł Jastrzębski <p.k.jastrzebski@gmail.com>
---
 .../command_buffer_get_command_buffer_info.cpp         | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp
index 3ce410c0..d46b2888 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp
@@ -240,9 +240,10 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
         clEventWrapper trigger_event = clCreateUserEvent(context, &error);
         test_error(error, "clCreateUserEvent failed");
 
+        clEventWrapper execute_event;
         // enqueued command buffer blocked on user event
         error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
-                                          &trigger_event, nullptr);
+                                          &trigger_event, &execute_event);
         test_error(error, "clEnqueueCommandBufferKHR failed");
 
         // verify pending state
@@ -255,6 +256,13 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
 
         test_error(signal_error, "clSetUserEventStatus failed");
 
+        error = clWaitForEvents(1, &execute_event);
+        test_error(error, "Unable to wait for execute event");
+
+        // verify executable state
+        error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
+        test_error(error, "verify_state failed");
+
         return CL_SUCCESS;
     }
 

From 4cb39b8c140563a5fda7b375ee919f084cd3bc11 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 30 May 2023 17:48:09 +0200
Subject: [PATCH 16/30] Added cl_khr_fp16 extension support for test_hiloeo
 from basic (#1721)

* Added cl_khr_fp16 support for hiloeo test from basic (issue #142, basic)

* Added correction due to compiler warrning

* Cosmetic correction
---
 test_conformance/basic/test_hiloeo.cpp | 365 ++++++++-----------------
 1 file changed, 120 insertions(+), 245 deletions(-)

diff --git a/test_conformance/basic/test_hiloeo.cpp b/test_conformance/basic/test_hiloeo.cpp
index 3470ad00..4e921a6e 100644
--- a/test_conformance/basic/test_hiloeo.cpp
+++ b/test_conformance/basic/test_hiloeo.cpp
@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
+// Copyright (c) 2023 The Khronos Group Inc.
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,14 +13,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
-
+#include <iomanip>
+#include <limits.h>
 #include <stdio.h>
 #include <string.h>
-#include <limits.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-
+#include <vector>
 
 #include "procs.h"
 
@@ -31,9 +30,10 @@ int odd_offset( int index, int vectorSize ) { return index * 2 + 1; }
 
 typedef int (*OffsetFunc)( int index, int vectorSize );
 static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset };
-typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName );
 static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" };
-static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" };
+static const char *test_str_names[] = { "char", "uchar", "short", "ushort",
+                                        "int",  "uint",  "long",  "ulong",
+                                        "half", "float", "double" };
 
 static const unsigned int vector_sizes[] =     { 1, 2, 3, 4, 8, 16};
 static const unsigned int vector_aligns[] =    { 1, 2, 4, 4, 8, 16};
@@ -45,43 +45,41 @@ static const unsigned int out_vector_idx[] =   { 0, 0, 1, 1, 3, 4};
 // strcat(gentype, vector_size_names[out_vector_idx[i]]);
 static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
 
-static const size_t  kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
+static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 2, 4, 8 };
 static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse );
 
 int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
 {
-    cl_int *input_ptr, *output_ptr, *p;
     int err;
-    cl_uint i;
     int hasDouble = is_extension_available( device, "cl_khr_fp64" );
+    int hasHalf = is_extension_available(device, "cl_khr_fp16");
     cl_uint vectorSize, operatorToUse;
     cl_uint type;
-    MTdata d;
+    MTdataHolder d(gRandomSeed);
 
     int expressionMode;
     int numExpressionModes = 2;
 
     size_t length = sizeof(cl_int) * 4 * n_elems;
 
-    input_ptr   = (cl_int*)malloc(length);
-    output_ptr  = (cl_int*)malloc(length);
+    std::vector<cl_int> input_ptr(4 * n_elems);
+    std::vector<cl_int> output_ptr(4 * n_elems);
 
-    p = input_ptr;
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<4 * (cl_uint) n_elems; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
+    for (cl_uint i = 0; i < 4 * (cl_uint)n_elems; i++)
+        input_ptr[i] = genrand_int32(d);
 
     for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
     {
         // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
         size_t elementCount = length / kSizes[type];
-        cl_mem streams[2];
+        clMemWrapper streams[2];
 
         // skip double if unavailable
         if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" )))
             continue;
 
+        if (!hasHalf && (0 == strcmp(test_str_names[type], "half"))) continue;
+
         if( !gHasLong &&
             (( 0 == strcmp( test_str_names[type], "long" )) ||
             ( 0 == strcmp( test_str_names[type], "ulong" ))))
@@ -104,12 +102,9 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue,
             return -1;
         }
 
-        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueWriteBuffer failed\n");
-            return -1;
-        }
+        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length,
+                                   input_ptr.data(), 0, NULL, NULL);
+        test_error(err, "clEnqueueWriteBuffer failed\n");
 
         for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ )
         {
@@ -118,8 +113,8 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue,
             for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) {
                 for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) {
 
-                    cl_program program = NULL;
-                    cl_kernel kernel = NULL;
+                    clProgramWrapper program;
+                    clKernelWrapper kernel;
                     cl_uint outVectorSize = out_vector_idx[vectorSize];
                     char expression[1024];
 
@@ -139,92 +134,64 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue,
                         "}\n"
                     };
 
-                    if(expressionMode == 0) {
-                        sprintf(expression, "srcA[tid]");
-                    } else if(expressionMode == 1) {
-                        switch(vector_sizes[vectorSize]) {
-                            case 16:
-                                sprintf(expression,
-                                        "((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))",
-                                        test_str_names[type]
-                                        );
-                                break;
-                            case 8:
-                                sprintf(expression,
-                                        "((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))",
-                                        test_str_names[type]
-                                        );
-                                break;
-                            case 4:
-                                sprintf(expression,
-                                        "((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))",
-                                        test_str_names[type]
-                                        );
-                                break;
-                            case 3:
-                                sprintf(expression,
-                                        "((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))",
-                                        test_str_names[type]
-                                        );
-                                break;
-                            case 2:
-                                sprintf(expression,
-                                        "((%s2)(srcA[tid].s0, srcA[tid].s1))",
-                                        test_str_names[type]
-                                        );
-                                break;
-                            default :
-                                sprintf(expression, "srcA[tid]");
-                                log_info("Default\n");
-                        }
-                    } else {
-                        sprintf(expression, "srcA[tid]");
+                    if (expressionMode == 1 && vector_sizes[vectorSize] != 1)
+                    {
+                        std::ostringstream sstr;
+                        const char *index_chars[] = { "0", "1", "2", "3",
+                                                      "4", "5", "6", "7",
+                                                      "8", "9", "A", "B",
+                                                      "C", "D", "E", "f" };
+                        sstr << "((" << test_str_names[type]
+                             << std::to_string(vector_sizes[vectorSize])
+                             << ")(";
+                        for (unsigned i = 0; i < vector_sizes[vectorSize]; i++)
+                            sstr << " srcA[tid].s" << index_chars[i] << ",";
+                        sstr.seekp(-1, sstr.cur);
+                        sstr << "))";
+                        std::snprintf(expression, sizeof(expression), "%s",
+                                      sstr.str().c_str());
+                    }
+                    else
+                    {
+                        std::snprintf(expression, sizeof(expression),
+                                      "srcA[tid]");
                     }
 
                     if (0 == strcmp( test_str_names[type], "double" ))
                         source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
 
+                    if (0 == strcmp(test_str_names[type], "half"))
+                        source[0] =
+                            "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+
                     char kernelName[128];
                     snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] );
                     err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
-                    if (err)
-                        return -1;
+                    test_error(err, "create_single_kernel_helper failed\n");
 
                     err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
                     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-                    if (err != CL_SUCCESS)
-                    {
-                        log_error("clSetKernelArgs failed\n");
-                        return -1;
-                    }
+                    test_error(err, "clSetKernelArg failed\n");
 
                     //Wipe the output buffer clean
                     uint32_t pattern = 0xdeadbeef;
-                    memset_pattern4( output_ptr, &pattern, length );
-                    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-                    if (err != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueWriteBuffer failed\n");
-                        return -1;
-                    }
+                    memset_pattern4(output_ptr.data(), &pattern, length);
+                    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0,
+                                               length, output_ptr.data(), 0,
+                                               NULL, NULL);
+                    test_error(err, "clEnqueueWriteBuffer failed\n");
 
                     size_t size = elementCount / (vector_aligns[vectorSize]);
                     err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
-                    if (err != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueNDRangeKernel failed\n");
-                        return -1;
-                    }
+                    test_error(err, "clEnqueueNDRangeKernel failed\n");
 
-                    err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-                    if (err != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadBuffer failed\n");
-                        return -1;
-                    }
+                    err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
+                                              length, output_ptr.data(), 0,
+                                              NULL, NULL);
+                    test_error(err, "clEnqueueReadBuffer failed\n");
 
-                    char *inP = (char *)input_ptr;
-                    char *outP = (char *)output_ptr;
+                    char *inP = (char *)input_ptr.data();
+                    char *outP = (char *)output_ptr.data();
                     outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) -
                                             ( vector_sizes[ out_vector_idx[vectorSize] ] ) );
                     // was                outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
@@ -240,180 +207,88 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue,
                         inP += kSizes[type] * ( vector_aligns[vectorSize] );
                         outP += kSizes[type] * ( vector_aligns[outVectorSize] );
                     }
-
-                    clReleaseKernel( kernel );
-                    clReleaseProgram( program );
                     log_info( "." );
                     fflush( stdout );
                 }
             }
         }
-
-        clReleaseMemObject( streams[0] );
-        clReleaseMemObject( streams[1] );
         log_info( "done\n" );
     }
 
     log_info("HiLoEO test passed\n");
-
-    free(input_ptr);
-    free(output_ptr);
-
     return err;
 }
 
-static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse )
+template <typename T>
+cl_int verify(void *in, void *out, size_t elementCount, int type,
+              int vectorSize, int operatorToUse, size_t cmpVectorSize)
 {
-    cl_ulong  array[8];
+    size_t halfVectorSize = vector_sizes[out_vector_idx[vectorSize]];
+    size_t elementSize = kSizes[type];
+    OffsetFunc f = offsetFuncs[operatorToUse];
+    cl_ulong array[8];
     void *p = array;
-    size_t halfVectorSize  = vector_sizes[out_vector_idx[vectorSize]];
-    size_t cmpVectorSize =  vector_sizes[out_vector_idx[vectorSize]];
-    // was 1 << (vectorSize-1);
-    OffsetFunc f = offsetFuncs[ operatorToUse ];
-    size_t elementSize =  kSizes[type];
 
-    if(vector_size_names[vectorSize][0] == '3') {
-        if(operatorToUse_names[operatorToUse][0] == 'h' ||
-           operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
+    std::ostringstream ss;
+
+    T *i = (T *)in, *o = (T *)out;
+
+    for (cl_uint k = 0; k < elementCount; k++)
+    {
+        T *o2 = (T *)p;
+        for (size_t j = 0; j < halfVectorSize; j++)
+            o2[j] = i[f((int)j, (int)halfVectorSize * 2)];
+
+        if (memcmp(o, o2, elementSize * cmpVectorSize))
+        {
+            ss << "\n"
+               << k << ") Failure for" << test_str_names[type]
+               << vector_size_names[vectorSize] << '.'
+               << operatorToUse_names[operatorToUse] << " { "
+               << "0x" << std::setfill('0') << std::setw(elementSize * 2)
+               << std::hex << i[0];
+
+            for (size_t j = 1; j < halfVectorSize * 2; j++) ss << ", " << i[j];
+            ss << " } --> { " << o[0];
+            for (size_t j = 1; j < halfVectorSize; j++) ss << ", " << o[j];
+            ss << " }\n";
+            return -1;
+        }
+        i += 2 * halfVectorSize;
+        o += halfVectorSize;
+    }
+    return 0;
+}
+
+static int CheckResults(void *in, void *out, size_t elementCount, int type,
+                        int vectorSize, int operatorToUse)
+{
+    size_t cmpVectorSize = vector_sizes[out_vector_idx[vectorSize]];
+    size_t elementSize = kSizes[type];
+
+    if (vector_size_names[vectorSize][0] == '3')
+    {
+        if (operatorToUse_names[operatorToUse][0] == 'h'
+            || operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
         {
             cmpVectorSize = 1; // special case for vec3 ignored values
         }
     }
 
-    switch( elementSize )
+    switch (elementSize)
     {
         case 1:
-        {
-            char *i = (char*)in;
-            char *o = (char*)out;
-            size_t j;
-            cl_uint k;
-            OffsetFunc f = offsetFuncs[ operatorToUse ];
-
-            for( k = 0; k  < elementCount; k++ )
-            {
-                char *o2 = (char*)p;
-                for( j = 0; j < halfVectorSize; j++ )
-                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
-
-                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
-                {
-                    log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
-                    for( j = 1; j < halfVectorSize * 2; j++ )
-                        log_info( ", %d", i[j] );
-                    log_info( " } --> { %d", o[0] );
-                    for( j = 1; j < halfVectorSize; j++ )
-                        log_info( ", %d", o[j] );
-                    log_info( " }\n" );
-                    return -1;
-                }
-                i += 2 * halfVectorSize;
-                o += halfVectorSize;
-            }
-        }
-            break;
-
+            return verify<char>(in, out, elementCount, type, vectorSize,
+                                operatorToUse, cmpVectorSize);
         case 2:
-        {
-            short *i = (short*)in;
-            short *o = (short*)out;
-            size_t j;
-            cl_uint k;
-
-            for( k = 0; k  < elementCount; k++ )
-            {
-                short *o2 = (short*)p;
-                for( j = 0; j < halfVectorSize; j++ )
-                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
-
-                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
-                {
-                    log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
-                    for( j = 1; j < halfVectorSize * 2; j++ )
-                        log_info( ", %d", i[j] );
-                    log_info( " } --> { %d", o[0] );
-                    for( j = 1; j < halfVectorSize; j++ )
-                        log_info( ", %d", o[j] );
-                    log_info( " }\n" );
-                    return -1;
-                }
-                i += 2 * halfVectorSize;
-                o += halfVectorSize;
-            }
-        }
-            break;
-
+            return verify<short>(in, out, elementCount, type, vectorSize,
+                                 operatorToUse, cmpVectorSize);
         case 4:
-        {
-            int *i = (int*)in;
-            int *o = (int*)out;
-            size_t j;
-            cl_uint k;
-
-            for( k = 0; k  < elementCount; k++ )
-            {
-                int *o2 = (int *)p;
-                for( j = 0; j < halfVectorSize; j++ )
-                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
-
-                for( j = 0; j < cmpVectorSize; j++ )
-        {
-            /* Allow float nans to be binary different */
-            if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
-            {
-                log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
-            for( j = 1; j < halfVectorSize * 2; j++ )
-                log_info( ", 0x%8.8x", i[j] );
-            log_info( " } --> { 0x%8.8x", o[0] );
-            for( j = 1; j < halfVectorSize; j++ )
-                log_info( ", 0x%8.8x", o[j] );
-            log_info( " }\n" );
-            return -1;
-            }
-        }
-        i += 2 * halfVectorSize;
-        o += halfVectorSize;
-            }
-        }
-            break;
-
+            return verify<int>(in, out, elementCount, type, vectorSize,
+                               operatorToUse, cmpVectorSize);
         case 8:
-        {
-            cl_ulong *i = (cl_ulong*)in;
-            cl_ulong *o = (cl_ulong*)out;
-            size_t j;
-            cl_uint k;
-
-            for( k = 0; k  < elementCount; k++ )
-            {
-                cl_ulong *o2 = (cl_ulong*)p;
-                for( j = 0; j < halfVectorSize; j++ )
-                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
-
-                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
-                {
-                    log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
-                    for( j = 1; j < halfVectorSize * 2; j++ )
-                        log_info( ", 0x%16.16llx", i[j] );
-                    log_info( " } --> { 0x%16.16llx", o[0] );
-                    for( j = 1; j < halfVectorSize; j++ )
-                        log_info( ", 0x%16.16llx", o[j] );
-                    log_info( " }\n" );
-                    return -1;
-                }
-                i += 2 * halfVectorSize;
-                o += halfVectorSize;
-            }
-        }
-            break;
-
-        default:
-            log_info( "Internal error. Unknown data type\n" );
-            return -2;
+            return verify<cl_ulong>(in, out, elementCount, type, vectorSize,
+                                    operatorToUse, cmpVectorSize);
+        default: log_info("Internal error. Unknown data type\n"); return -2;
     }
-
-    return 0;
 }
-
-
-

From 969238050574393c0cd2ba28f595e4b078af0b59 Mon Sep 17 00:00:00 2001
From: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
Date: Tue, 30 May 2023 09:49:31 -0600
Subject: [PATCH 17/30] Semaphore test: Use blocking semaphores (#1675)

Semaphore spec has been updated to reflect the fact that semaphores
will be in the appropriate - pending signal or pending wait - state
when returning from clEnqueueSignalSemaphore or
clEnqueueWaitSemaphore commands: KhronosGroup/OpenCL-Docs#882

Deleted the following tests to match the updated spec:

semaphores_order_1 - Test calls EnqueueWaitSemaphore before calling
EnqueueSignalSemaphore and expects this wait to succeed.
This behavior is not compatible with the recent
spec updates to semaphores.

semaphores_order_2 & semaphores_order_3 - Calling
clEnqueueSignalSemaphoresKHR with a dependency on a user event may
cause the implementation to block until the user event is complete.
This is unsafe usage of clEnqueueSignalSemaphoresKHR and may
lead to deadlock.

semaphores_invalid_command - This test checks for specific behavior
when waiting on a semaphore in an invalid state.
According to the spec, this is undefined behavior,
and therefore cannot be tested directly.

Co-authored-by: Joshua Kelly <joshkell@qti.qualcomm.com>
---
 .../extensions/cl_khr_semaphore/main.cpp      |   6 +-
 .../extensions/cl_khr_semaphore/procs.h       |  12 +-
 .../cl_khr_semaphore/test_semaphores.cpp      | 403 +-----------------
 3 files changed, 5 insertions(+), 416 deletions(-)

diff --git a/test_conformance/extensions/cl_khr_semaphore/main.cpp b/test_conformance/extensions/cl_khr_semaphore/main.cpp
index ab9699b0..0ae7206a 100644
--- a/test_conformance/extensions/cl_khr_semaphore/main.cpp
+++ b/test_conformance/extensions/cl_khr_semaphore/main.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2023 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -34,11 +34,7 @@ test_definition test_list[] = {
     ADD_TEST_VERSION(semaphores_multi_signal, Version(1, 2)),
     ADD_TEST_VERSION(semaphores_multi_wait, Version(1, 2)),
     ADD_TEST_VERSION(semaphores_queries, Version(1, 2)),
-    ADD_TEST_VERSION(semaphores_order_1, Version(1, 2)),
-    ADD_TEST_VERSION(semaphores_order_2, Version(1, 2)),
-    ADD_TEST_VERSION(semaphores_order_3, Version(1, 2)),
     ADD_TEST_VERSION(semaphores_import_export_fd, Version(1, 2)),
-    ADD_TEST_VERSION(semaphores_invalid_command, Version(1, 2)),
 };
 
 const int test_num = ARRAY_SIZE(test_list);
diff --git a/test_conformance/extensions/cl_khr_semaphore/procs.h b/test_conformance/extensions/cl_khr_semaphore/procs.h
index 06651af4..f7c1aaa3 100644
--- a/test_conformance/extensions/cl_khr_semaphore/procs.h
+++ b/test_conformance/extensions/cl_khr_semaphore/procs.h
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2023 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -41,17 +41,7 @@ extern int test_semaphores_multi_wait(cl_device_id deviceID, cl_context context,
                                       cl_command_queue queue, int num_elements);
 extern int test_semaphores_queries(cl_device_id deviceID, cl_context context,
                                    cl_command_queue queue, int num_elements);
-extern int test_semaphores_order_1(cl_device_id deviceID, cl_context context,
-                                   cl_command_queue queue, int num_elements);
-extern int test_semaphores_order_2(cl_device_id deviceID, cl_context context,
-                                   cl_command_queue queue, int num_elements);
-extern int test_semaphores_order_3(cl_device_id deviceID, cl_context context,
-                                   cl_command_queue queue, int num_elements);
 extern int test_semaphores_import_export_fd(cl_device_id deviceID,
                                             cl_context context,
                                             cl_command_queue queue,
                                             int num_elements);
-extern int test_semaphores_invalid_command(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
index 7d03bff3..36bb8ad5 100644
--- a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
+++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2022 The Khronos Group Inc.
+// Copyright (c) 2023 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -646,303 +646,6 @@ int test_semaphores_queries(cl_device_id deviceID, cl_context context,
     return TEST_PASS;
 }
 
-// Confirm that it is possible to enqueue a signal of wait and signal in any
-// order as soon as the submission order (after deferred dependencies) is
-// correct. Case: first one deferred wait, then one non deferred signal.
-int test_semaphores_order_1(cl_device_id deviceID, cl_context context,
-                            cl_command_queue defaultQueue, int num_elements)
-{
-    cl_int err;
-
-    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
-    {
-        log_info("cl_khr_semaphore is not supported on this platoform. "
-                 "Skipping test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
-    GET_PFN(deviceID, clReleaseSemaphoreKHR);
-
-    // Create ooo queue
-    clCommandQueueWrapper queue = clCreateCommandQueue(
-        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
-    test_error(err, "Could not create command queue");
-
-    // Create semaphore
-    cl_semaphore_properties_khr sema_props[] = {
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
-        0
-    };
-    cl_semaphore_khr sema =
-        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
-    test_error(err, "Could not create semaphore");
-
-    // Create user event
-    clEventWrapper user_event = clCreateUserEvent(context, &err);
-    test_error(err, "Could not create user event");
-
-    // Wait semaphore (dependency on user_event)
-    clEventWrapper wait_event;
-    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event,
-                                     &wait_event);
-    test_error(err, "Could not wait semaphore");
-
-    // Signal semaphore
-    clEventWrapper signal_event;
-    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 0, nullptr,
-                                       &signal_event);
-    test_error(err, "Could not signal semaphore");
-
-    // Flush and delay
-    err = clFlush(queue);
-    test_error(err, "Could not flush queue");
-    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
-
-    // Ensure signal event is completed while wait event is not
-    test_assert_event_complete(signal_event);
-    test_assert_event_inprogress(wait_event);
-
-    // Complete user_event
-    err = clSetUserEventStatus(user_event, CL_COMPLETE);
-    test_error(err, "Could not set user event to CL_COMPLETE");
-
-    // Finish
-    err = clFinish(queue);
-    test_error(err, "Could not finish queue");
-
-    // Ensure all events are completed
-    test_assert_event_complete(signal_event);
-    test_assert_event_complete(wait_event);
-
-    // Release semaphore
-    err = clReleaseSemaphoreKHR(sema);
-    test_error(err, "Could not release semaphore");
-
-    return TEST_PASS;
-}
-
-// Confirm that it is possible to enqueue a signal of wait and signal in any
-// order as soon as the submission order (after deferred dependencies) is
-// correct. Case: first two deferred signals, then one deferred wait. Unblock
-// signal, then unblock wait. When wait completes, unblock the other signal.
-int test_semaphores_order_2(cl_device_id deviceID, cl_context context,
-                            cl_command_queue defaultQueue, int num_elements)
-{
-    cl_int err;
-
-    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
-    {
-        log_info("cl_khr_semaphore is not supported on this platoform. "
-                 "Skipping test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
-    GET_PFN(deviceID, clReleaseSemaphoreKHR);
-
-    // Create ooo queue
-    clCommandQueueWrapper queue = clCreateCommandQueue(
-        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
-    test_error(err, "Could not create command queue");
-
-    // Create semaphore
-    cl_semaphore_properties_khr sema_props[] = {
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
-        0
-    };
-    cl_semaphore_khr sema =
-        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
-    test_error(err, "Could not create semaphore");
-
-    // Create user events
-    clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
-    test_error(err, "Could not create user event");
-
-    clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
-    test_error(err, "Could not create user event");
-
-    clEventWrapper user_event_3 = clCreateUserEvent(context, &err);
-    test_error(err, "Could not create user event");
-
-    // Signal semaphore (dependency on user_event_1)
-    clEventWrapper signal_1_event;
-    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
-                                       &user_event_1, &signal_1_event);
-    test_error(err, "Could not signal semaphore");
-
-    // Signal semaphore (dependency on user_event_2)
-    clEventWrapper signal_2_event;
-    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
-                                       &user_event_2, &signal_2_event);
-    test_error(err, "Could not signal semaphore");
-
-    // Wait semaphore (dependency on user_event_3)
-    clEventWrapper wait_event;
-    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_3,
-                                     &wait_event);
-    test_error(err, "Could not wait semaphore");
-
-    // Complete user_event_1
-    err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
-    test_error(err, "Could not set user event to CL_COMPLETE");
-
-    // Complete user_event_3
-    err = clSetUserEventStatus(user_event_3, CL_COMPLETE);
-    test_error(err, "Could not set user event to CL_COMPLETE");
-
-    // Flush and delay
-    err = clFlush(queue);
-    test_error(err, "Could not flush queue");
-    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
-
-    // Ensure all events are completed except for second signal
-    test_assert_event_complete(signal_1_event);
-    test_assert_event_inprogress(signal_2_event);
-    test_assert_event_complete(wait_event);
-
-    // Complete user_event_2
-    err = clSetUserEventStatus(user_event_2, CL_COMPLETE);
-    test_error(err, "Could not set user event to CL_COMPLETE");
-
-    // Finish
-    err = clFinish(queue);
-    test_error(err, "Could not finish queue");
-
-    // Ensure all events are completed
-    test_assert_event_complete(signal_1_event);
-    test_assert_event_complete(signal_2_event);
-    test_assert_event_complete(wait_event);
-
-    // Release semaphore
-    err = clReleaseSemaphoreKHR(sema);
-    test_error(err, "Could not release semaphore");
-
-    return TEST_PASS;
-}
-
-// Confirm that it is possible to enqueue a signal of wait and signal in any
-// order as soon as the submission order (after deferred dependencies) is
-// correct. Case: first two deferred signals, then two deferred waits. Unblock
-// one signal and one wait (both blocked by the same user event). When wait
-// completes, unblock the other signal. Then unblock the other wait.
-int test_semaphores_order_3(cl_device_id deviceID, cl_context context,
-                            cl_command_queue defaultQueue, int num_elements)
-{
-    cl_int err;
-
-    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
-    {
-        log_info("cl_khr_semaphore is not supported on this platoform. "
-                 "Skipping test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
-    GET_PFN(deviceID, clReleaseSemaphoreKHR);
-
-    // Create ooo queue
-    clCommandQueueWrapper queue = clCreateCommandQueue(
-        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
-    test_error(err, "Could not create command queue");
-
-    // Create semaphore
-    cl_semaphore_properties_khr sema_props[] = {
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
-        0
-    };
-    cl_semaphore_khr sema =
-        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
-    test_error(err, "Could not create semaphore");
-
-    // Create user events
-    clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
-    test_error(err, "Could not create user event");
-
-    clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
-    test_error(err, "Could not create user event");
-
-    clEventWrapper user_event_3 = clCreateUserEvent(context, &err);
-    test_error(err, "Could not create user event");
-
-    // Signal semaphore (dependency on user_event_1)
-    clEventWrapper signal_1_event;
-    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
-                                       &user_event_1, &signal_1_event);
-    test_error(err, "Could not signal semaphore");
-
-    // Signal semaphore (dependency on user_event_2)
-    clEventWrapper signal_2_event;
-    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
-                                       &user_event_2, &signal_2_event);
-    test_error(err, "Could not signal semaphore");
-
-    // Wait semaphore (dependency on user_event_3)
-    clEventWrapper wait_1_event;
-    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_3,
-                                     &wait_1_event);
-    test_error(err, "Could not wait semaphore");
-
-    // Wait semaphore (dependency on user_event_2)
-    clEventWrapper wait_2_event;
-    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_2,
-                                     &wait_2_event);
-    test_error(err, "Could not wait semaphore");
-
-    // Complete user_event_2
-    err = clSetUserEventStatus(user_event_2, CL_COMPLETE);
-    test_error(err, "Could not set user event to CL_COMPLETE");
-
-    // Flush and delay
-    err = clFlush(queue);
-    test_error(err, "Could not flush queue");
-    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
-
-    // Ensure only second signal and second wait completed
-    cl_event event_list[] = { signal_2_event, wait_2_event };
-    err = clWaitForEvents(2, event_list);
-    test_error(err, "Could not wait for events");
-
-    test_assert_event_inprogress(signal_1_event);
-    test_assert_event_inprogress(wait_1_event);
-
-    // Complete user_event_1
-    err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
-    test_error(err, "Could not set user event to CL_COMPLETE");
-
-    // Complete user_event_3
-    err = clSetUserEventStatus(user_event_3, CL_COMPLETE);
-    test_error(err, "Could not set user event to CL_COMPLETE");
-
-    // Finish
-    err = clFinish(queue);
-    test_error(err, "Could not finish queue");
-
-    // Ensure all events are completed
-    test_assert_event_complete(signal_1_event);
-    test_assert_event_complete(signal_2_event);
-    test_assert_event_complete(wait_1_event);
-    test_assert_event_complete(wait_2_event);
-
-    // Release semaphore
-    err = clReleaseSemaphoreKHR(sema);
-    test_error(err, "Could not release semaphore");
-
-    return TEST_PASS;
-}
-
 // Test it is possible to export a semaphore to a sync fd and import the same
 // sync fd to a new semaphore
 int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context,
@@ -985,6 +688,8 @@ int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context,
             CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
         static_cast<cl_semaphore_properties_khr>(
             CL_SEMAPHORE_HANDLE_SYNC_FD_KHR),
+        static_cast<cl_semaphore_properties_khr>(
+            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
         0
     };
     cl_semaphore_khr sema_1 =
@@ -1039,107 +744,5 @@ int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context,
 
     err = clReleaseSemaphoreKHR(sema_2);
     test_error(err, "Could not release semaphore");
-    return TEST_PASS;
-}
-
-// Test that an invalid semaphore command results in the invalidation of the
-// command's event and the dependencies' events
-int test_semaphores_invalid_command(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue defaultQueue,
-                                    int num_elements)
-{
-    cl_int err;
-
-    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
-    {
-        log_info("cl_khr_semaphore is not supported on this platoform. "
-                 "Skipping test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
-    GET_PFN(deviceID, clReleaseSemaphoreKHR);
-
-    // Create ooo queue
-    clCommandQueueWrapper queue = clCreateCommandQueue(
-        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
-    test_error(err, "Could not create command queue");
-
-    // Create semaphores
-    cl_semaphore_properties_khr sema_props[] = {
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
-        0
-    };
-    cl_semaphore_khr sema_1 =
-        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
-    test_error(err, "Could not create semaphore");
-
-    cl_semaphore_khr sema_2 =
-        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
-    test_error(err, "Could not create semaphore");
-
-    // Create user events
-    clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
-    test_error(err, "Could not create user event");
-
-    clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
-    test_error(err, "Could not create user event");
-
-    // Signal semaphore_1 (dependency on user_event_1)
-    clEventWrapper signal_1_event;
-    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 1,
-                                       &user_event_1, &signal_1_event);
-    test_error(err, "Could not signal semaphore");
-
-    // Wait semaphore_1 and semaphore_2 (dependency on user_event_1)
-    clEventWrapper wait_event;
-    cl_semaphore_khr sema_list[] = { sema_1, sema_2 };
-    err = clEnqueueWaitSemaphoresKHR(queue, 2, sema_list, nullptr, 1,
-                                     &user_event_1, &wait_event);
-    test_error(err, "Could not wait semaphore");
-
-    // Signal semaphore_1 (dependency on wait_event and user_event_2)
-    clEventWrapper signal_2_event;
-    cl_event wait_list[] = { user_event_2, wait_event };
-    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 2, wait_list,
-                                       &signal_2_event);
-    test_error(err, "Could not signal semaphore");
-
-    // Flush and delay
-    err = clFlush(queue);
-    test_error(err, "Could not flush queue");
-    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
-
-    // Ensure all events are not completed
-    test_assert_event_inprogress(signal_1_event);
-    test_assert_event_inprogress(signal_2_event);
-    test_assert_event_inprogress(wait_event);
-
-    // Complete user_event_1 (expect failure as waiting on semaphore_2 is not
-    // allowed (unsignaled)
-    err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
-    test_assert_error(err != CL_SUCCESS,
-                      "signal_2_event completed unexpectedly");
-
-    // Ensure signal_1 is completed while others failed (the second signal
-    // should fail as it depends on wait)
-    err = clFinish(queue);
-    test_error(err, "Could not finish queue");
-
-    test_assert_event_complete(signal_1_event);
-    test_assert_event_terminated(wait_event);
-    test_assert_event_terminated(signal_2_event);
-
-    // Release semaphore
-    err = clReleaseSemaphoreKHR(sema_1);
-    test_error(err, "Could not release semaphore");
-
-    err = clReleaseSemaphoreKHR(sema_2);
-    test_error(err, "Could not release semaphore");
-
     return TEST_PASS;
 }
\ No newline at end of file

From c58ead9aeaa7baaf8bcbec8642e79d3ce1cc1e09 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 30 May 2023 17:52:06 +0200
Subject: [PATCH 18/30] Added cl_khr_fp16 extension support for test_astype
 from basic (#1706)

* Added support for cl_khr_fp16 extenstion in test_astype from basic (issue #142, basic)

* Added correction to iterate over vector of types

* Fixed case with both fp16 and fp64 supported

* Cosmetic corrections due to code review

* Cosmetic corrections due to code review
---
 test_conformance/basic/test_astype.cpp | 203 +++++++++++--------------
 test_conformance/basic/utils.h         |  41 +++++
 2 files changed, 126 insertions(+), 118 deletions(-)
 create mode 100644 test_conformance/basic/utils.h

diff --git a/test_conformance/basic/test_astype.cpp b/test_conformance/basic/test_astype.cpp
index 7281f904..08a4cb85 100644
--- a/test_conformance/basic/test_astype.cpp
+++ b/test_conformance/basic/test_astype.cpp
@@ -15,61 +15,39 @@
 //
 #include "harness/compat.h"
 
+#include <limits.h>
 #include <stdio.h>
 #include <string.h>
-#include <limits.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <vector>
 
-
-#include "procs.h"
 #include "harness/conversions.h"
 #include "harness/typeWrappers.h"
 
+#include "procs.h"
+#include "utils.h"
 
-static const char *astype_kernel_pattern =
-"%s\n"
+// clang-format off
+
+static char extension[128] = { 0 };
+static char strLoad[128] = { 0 };
+static char strStore[128] = { 0 };
+static const char *regLoad = "as_%s%s(src[tid]);\n";
+static const char *v3Load = "as_%s%s(vload3(tid,(__global %s*)src));\n";
+static const char *regStore = "dst[tid] = tmp;\n";
+static const char *v3Store = "vstore3(tmp, tid, (__global %s*)dst);\n";
+
+static const char* astype_kernel_pattern[] = {
+extension,
 "__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
 "{\n"
-"    int tid = get_global_id( 0 );\n"
-"    %s%s tmp = as_%s%s( src[ tid ] );\n"
-"   dst[ tid ] = tmp;\n"
-"}\n";
-
-static const char *astype_kernel_pattern_V3srcV3dst =
-"%s\n"
-"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
-"{\n"
-"    int tid = get_global_id( 0 );\n"
-"    %s%s tmp = as_%s%s( vload3(tid,src) );\n"
-"   vstore3(tmp,tid,dst);\n"
-"}\n";
-// in the printf, remove the third and fifth argument, each of which
-// should be a "3", when copying from the printf for astype_kernel_pattern
-
-static const char *astype_kernel_pattern_V3dst =
-"%s\n"
-"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
-"{\n"
-"    int tid = get_global_id( 0 );\n"
-"    %s3 tmp = as_%s3( src[ tid ] );\n"
-"   vstore3(tmp,tid,dst);\n"
-"}\n";
-// in the printf, remove the fifth argument, which
-// should be a "3", when copying from the printf for astype_kernel_pattern
-
-
-static const char *astype_kernel_pattern_V3src =
-"%s\n"
-"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
-"{\n"
-"    int tid = get_global_id( 0 );\n"
-"    %s%s tmp = as_%s%s( vload3(tid,src) );\n"
-"   dst[ tid ] = tmp;\n"
-"}\n";
-// in the printf, remove the third argument, which
-// should be a "3", when copying from the printf for astype_kernel_pattern
+"    int tid = get_global_id( 0 );\n",
+"    %s%s tmp = ", strLoad,
+"    ", strStore,
+"}\n"};
 
+// clang-format on
 
 int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
                     unsigned int vecSize, unsigned int outVecSize,
@@ -81,68 +59,60 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
     clKernelWrapper kernel;
     clMemWrapper streams[ 2 ];
 
-    char programSrc[ 10240 ];
     size_t threads[ 1 ], localThreads[ 1 ];
     size_t typeSize = get_explicit_type_size( inVecType );
     size_t outTypeSize = get_explicit_type_size(outVecType);
     char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
-    MTdata d;
+    MTdataHolder d(gRandomSeed);
 
+    std::ostringstream sstr;
+    if (outVecType == kDouble || inVecType == kDouble)
+        sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
 
+    if (outVecType == kHalf || inVecType == kHalf)
+        sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
 
-    // Create program
-    if(outVecSize == 3 && vecSize == 3) {
-        // astype_kernel_pattern_V3srcV3dst
-        sprintf( programSrc, astype_kernel_pattern_V3srcV3dst,
-                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( inVecType ), // sizeNames[ vecSize ],
-                get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] );
-    } else if(outVecSize == 3) {
-        // astype_kernel_pattern_V3dst
-        sprintf( programSrc, astype_kernel_pattern_V3dst,
-                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
-                get_explicit_type_name( outVecType ),
-                get_explicit_type_name( outVecType ),
-                get_explicit_type_name( outVecType ));
+    strcpy(extension, sstr.str().c_str());
 
-    } else if(vecSize == 3) {
-        // astype_kernel_pattern_V3src
-        sprintf( programSrc, astype_kernel_pattern_V3src,
-                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( inVecType ),// sizeNames[ vecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
-    } else {
-        sprintf( programSrc, astype_kernel_pattern,
-                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
-    }
+    if (vecSize == 3)
+        std::snprintf(strLoad, sizeof(strLoad), v3Load,
+                      get_explicit_type_name(outVecType), sizeNames[outVecSize],
+                      get_explicit_type_name(inVecType));
+    else
+        std::snprintf(strLoad, sizeof(strLoad), regLoad,
+                      get_explicit_type_name(outVecType),
+                      sizeNames[outVecSize]);
 
-    const char *ptr = programSrc;
+    if (outVecSize == 3)
+        std::snprintf(strStore, sizeof(strStore), v3Store,
+                      get_explicit_type_name(outVecType));
+    else
+        std::snprintf(strStore, sizeof(strStore), "%s", regStore);
+
+    auto str =
+        concat_kernel(astype_kernel_pattern,
+                      sizeof(astype_kernel_pattern) / sizeof(const char *));
+    std::string kernelSource =
+        str_sprintf(str, get_explicit_type_name(inVecType), sizeNames[vecSize],
+                    get_explicit_type_name(outVecType), sizeNames[outVecSize],
+                    get_explicit_type_name(outVecType), sizeNames[outVecSize]);
+
+    const char *ptr = kernelSource.c_str();
     error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
     test_error( error, "Unable to create testing kernel" );
 
-
     // Create some input values
     size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
-    char *inBuffer = (char*)malloc( inBufferSize );
+    std::vector<char> inBuffer(inBufferSize);
     size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
-    char *outBuffer = (char*)malloc( outBufferSize );
+    std::vector<char> outBuffer(outBufferSize);
 
-    d = init_genrand( gRandomSeed );
-    generate_random_data( inVecType, numElements * vecSize,
-                         d, inBuffer );
-    free_mtdata(d); d = NULL;
+    generate_random_data(inVecType, numElements * vecSize, d,
+                         &inBuffer.front());
 
     // Create I/O streams and set arguments
-    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error );
+    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
+                                &inBuffer.front(), &error);
     test_error( error, "Unable to create I/O stream" );
     streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
     test_error( error, "Unable to create I/O stream" );
@@ -161,15 +131,15 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
     error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
     test_error( error, "Unable to run kernel" );
 
-
     // Get the results and compare
     // The beauty is that astype is supposed to return the bit pattern as a different type, which means
     // the output should have the exact same bit pattern as the input. No interpretation necessary!
-    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL );
+    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize,
+                                &outBuffer.front(), 0, NULL, NULL);
     test_error( error, "Unable to read results" );
 
-    char *expected = inBuffer;
-    char *actual = outBuffer;
+    char *expected = &inBuffer.front();
+    char *actual = &outBuffer.front();
     size_t compSize = typeSize*vecSize;
     if(outTypeSize*outVecSize < compSize) {
         compSize = outTypeSize*outVecSize;
@@ -178,8 +148,6 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
     if(outVecSize == 4 && vecSize == 3)
     {
         // as_type4(vec3) should compile but produce undefined results??
-        free(inBuffer);
-        free(outBuffer);
         return 0;
     }
 
@@ -188,8 +156,6 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
         // as_typen(vecm) should compile and run but produce
         // implementation-defined results for m != n
         // and n*sizeof(type) = sizeof(vecm)
-        free(inBuffer);
-        free(outBuffer);
         return 0;
     }
 
@@ -203,17 +169,14 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
                       GetDataVectorString( expected, typeSize, vecSize, expectedString ),
                       GetDataVectorString( actual, typeSize, vecSize, actualString ) );
             log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
-                      programSrc, (int)threads[0],(int) localThreads[0]);
-            free(inBuffer);
-            free(outBuffer);
+                      kernelSource.c_str(), (int)threads[0],
+                      (int)localThreads[0]);
             return 1;
         }
         expected += typeSize * vecSize;
         actual += outTypeSize * outVecSize;
     }
 
-    free(inBuffer);
-    free(outBuffer);
     return 0;
 }
 
@@ -223,31 +186,39 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue,
     // legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
     // for us to verify what is "valid". So the only thing we can test are types that match in size independent
     // of the element count (char -> uchar, etc)
-    ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    const std::vector<ExplicitType> vecTypes = { kChar,   kUChar, kShort,
+                                                 kUShort, kInt,   kUInt,
+                                                 kLong,   kULong, kFloat,
+                                                 kHalf,   kDouble };
+    const unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
     unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
     size_t inTypeSize, outTypeSize;
     int error = 0;
 
-    for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ )
+    bool fp16Support = is_extension_available(device, "cl_khr_fp16");
+    bool fp64Support = is_extension_available(device, "cl_khr_fp64");
+
+    auto skip_type = [&](ExplicitType et) {
+        if ((et == kLong || et == kULong) && !gHasLong)
+            return true;
+        else if (et == kDouble && !fp64Support)
+            return true;
+        else if (et == kHalf && !fp16Support)
+            return true;
+        return false;
+    };
+
+    for (inTypeIdx = 0; inTypeIdx < vecTypes.size(); inTypeIdx++)
     {
         inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
 
-        if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
-            continue;
+        if (skip_type(vecTypes[inTypeIdx])) continue;
 
-        if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong )
-            continue;
-
-        for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ )
+        for (outTypeIdx = 0; outTypeIdx < vecTypes.size(); outTypeIdx++)
         {
             outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
-            if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) {
-                continue;
-            }
 
-            if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
-                continue;
+            if (skip_type(vecTypes[outTypeIdx])) continue;
 
             // change this check
             if( inTypeIdx == outTypeIdx ) {
@@ -259,7 +230,6 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue,
 
             for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
             {
-
                 for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
                 {
                     if(vecSizes[sizeIdx]*inTypeSize !=
@@ -268,10 +238,7 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue,
                         continue;
                     }
                     error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
-
-
                 }
-
             }
             if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
                get_explicit_type_size(vecTypes[outTypeIdx])) {
diff --git a/test_conformance/basic/utils.h b/test_conformance/basic/utils.h
new file mode 100644
index 00000000..3f6bf64d
--- /dev/null
+++ b/test_conformance/basic/utils.h
@@ -0,0 +1,41 @@
+//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef BASIC_UTILS_H
+#define BASIC_UTILS_H
+
+#include <memory>
+#include <string>
+
+inline std::string concat_kernel(const char *sstr[], int num)
+{
+    std::string res;
+    for (int i = 0; i < num; i++) res += std::string(sstr[i]);
+    return res;
+}
+
+template <typename... Args>
+inline std::string str_sprintf(const std::string &str, Args... args)
+{
+    int str_size = std::snprintf(nullptr, 0, str.c_str(), args...) + 1;
+    if (str_size <= 0) throw std::runtime_error("Formatting error.");
+    size_t s = static_cast<size_t>(str_size);
+    std::unique_ptr<char[]> buffer(new char[s]);
+    std::snprintf(buffer.get(), s, str.c_str(), args...);
+    return std::string(buffer.get(), buffer.get() + s - 1);
+}
+
+#endif // BASIC_UTIL_H

From b3c1401d482252f7f65110a4bff8721c02a34b72 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 30 May 2023 17:52:27 +0200
Subject: [PATCH 19/30] Added cl_khr_fp16 extension support for
 test_async_strided_copy from basic (issue #142, basic) (#1711)

---
 .../basic/test_async_strided_copy.cpp         | 87 ++++++++++---------
 1 file changed, 48 insertions(+), 39 deletions(-)

diff --git a/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/basic/test_async_strided_copy.cpp
index c456f38d..4a848c0f 100644
--- a/test_conformance/basic/test_async_strided_copy.cpp
+++ b/test_conformance/basic/test_async_strided_copy.cpp
@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
+// Copyright (c) 2023 The Khronos Group Inc.
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -20,15 +20,16 @@
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-
-
+#include <vector>
 
 #include "procs.h"
 #include "harness/conversions.h"
 
+// clang-format off
+
 static const char *async_strided_global_to_local_kernel =
 "%s\n" // optional pragma string
-"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
+"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
 "{\n"
 " int i;\n"
 // Zero the local storage first
@@ -46,7 +47,7 @@ static const char *async_strided_global_to_local_kernel =
 
 static const char *async_strided_local_to_global_kernel =
 "%s\n" // optional pragma string
-"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
+"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
 "{\n"
 " int i;\n"
 // Zero the local storage first
@@ -63,6 +64,7 @@ static const char *async_strided_local_to_global_kernel =
 " wait_group_events( 1, &event );\n"
 "}\n" ;
 
+// clang-format on
 
 int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride)
 {
@@ -71,8 +73,7 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
     clKernelWrapper kernel;
     clMemWrapper streams[ 2 ];
     size_t threads[ 1 ], localThreads[ 1 ];
-    void *inBuffer, *outBuffer;
-    MTdata d;
+    MTdataHolder d(gRandomSeed);
     char vecNameString[64]; vecNameString[0] = 0;
 
     if (vecSize == 1)
@@ -94,10 +95,15 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
     char programSource[4096]; programSource[0]=0;
     char *programPtr;
 
-    sprintf(programSource, kernelCode,
-        vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-        "",
-        vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
+    std::string extStr = "";
+    if (vecType == kDouble)
+        extStr = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
+    else if (vecType == kHalf)
+        extStr = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable";
+
+    sprintf(programSource, kernelCode, extStr.c_str(), vecNameString,
+            vecNameString, vecNameString, vecNameString,
+            get_explicit_type_name(vecType), vecNameString, vecNameString);
     //log_info("program: %s\n", programSource);
     programPtr = programSource;
 
@@ -151,9 +157,9 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
     size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride;
     size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
 
-    inBuffer = (void*)malloc(globalBufferSize);
-    outBuffer = (void*)malloc(globalBufferSize);
-    memset(outBuffer, 0, globalBufferSize);
+    std::vector<unsigned char> inBuffer(globalBufferSize);
+    std::vector<unsigned char> outBuffer(globalBufferSize);
+    memset(outBuffer.data(), 0, globalBufferSize);
 
     cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
     copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
@@ -165,13 +171,15 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
     threads[0] = globalWorkgroupSize;
     localThreads[0] = localWorkgroupSize;
 
-    d = init_genrand( gRandomSeed );
-    generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
-    free_mtdata(d); d = NULL;
+    generate_random_data(vecType,
+                         globalBufferSize / get_explicit_type_size(vecType), d,
+                         inBuffer.data());
 
-    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
+    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
+                                inBuffer.data(), &error);
     test_error( error, "Unable to create input buffer" );
-    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
+    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
+                                outBuffer.data(), &error);
     test_error( error, "Unable to create output buffer" );
 
     error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
@@ -192,17 +200,20 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
     test_error( error, "Unable to queue kernel" );
 
     // Read
-    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
+    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, globalBufferSize,
+                                outBuffer.data(), 0, NULL, NULL);
     test_error( error, "Unable to read results" );
 
     // Verify
     size_t typeSize = get_explicit_type_size(vecType)* vecSize;
     for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride)
     {
-        if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, typeSize) != 0 )
+        if (memcmp(&inBuffer.at(i), &outBuffer.at(i), typeSize) != 0)
         {
-            unsigned char * inchar = (unsigned char*)inBuffer + i;
-            unsigned char * outchar = (unsigned char*)outBuffer + i;
+            unsigned char *inchar =
+                static_cast<unsigned char *>(inBuffer.data());
+            unsigned char *outchar =
+                static_cast<unsigned char *>(outBuffer.data());
             char values[4096];
             values[0] = 0;
 
@@ -215,34 +226,35 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
                 sprintf(values + strlen( values), "%2x ", outchar[j]);
             sprintf(values + strlen(values), "]");
             log_error("%s\n", values);
-            free(inBuffer);
-            free(outBuffer);
             return -1;
         }
     }
 
-    free(inBuffer);
-    free(outBuffer);
-
     return 0;
 }
 
 int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode)
 {
-    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
+    const std::vector<ExplicitType> vecType = { kChar,  kUChar, kShort, kUShort,
+                                                kInt,   kUInt,  kLong,  kULong,
+                                                kFloat, kHalf,  kDouble };
+    const unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    const unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
     unsigned int size, typeIndex, stride;
 
     int errors = 0;
 
-    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
-    {
-        if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
-            continue;
+    bool fp16Support = is_extension_available(deviceID, "cl_khr_fp16");
+    bool fp64Support = is_extension_available(deviceID, "cl_khr_fp64");
 
+    for (typeIndex = 0; typeIndex < vecType.size(); typeIndex++)
+    {
         if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
             continue;
+        else if (vecType[typeIndex] == kDouble && !fp64Support)
+            continue;
+        else if (vecType[typeIndex] == kHalf && !fp16Support)
+            continue;
 
         for( size = 0; vecSizes[ size ] != 0; size++ )
         {
@@ -260,9 +272,6 @@ int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_co
     return 0;
 }
 
-
-
-
 int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel );

From 38abfc7d24fdfaf7311810fcc4c5560b019757cf Mon Sep 17 00:00:00 2001
From: Zhaoyu Zhang <dreamibor@users.noreply.github.com>
Date: Tue, 30 May 2023 23:54:32 +0800
Subject: [PATCH 20/30] select: using clEnqueueReadBuffer rather than
 clEnqueueMapBuffer (#1712)

* select: using clEnqueueReadBuffer rather than clEnqueueMapBuffer

* Update code to be compatible with clang-format

* update code again to be compatible with clang-format

* update code again to comply with clang-format

* updata code again to be compatible with clang-format

The clang-format tool is so weird, it's not even consistent between runs...
---
 test_conformance/select/test_select.cpp | 92 +++++++++++++++++++++----
 1 file changed, 77 insertions(+), 15 deletions(-)

diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp
index 7fa3bc08..b0cda09f 100644
--- a/test_conformance/select/test_select.cpp
+++ b/test_conformance/select/test_select.cpp
@@ -303,6 +303,10 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
     cl_mem dest = NULL;
     void *ref = NULL;
     void *sref = NULL;
+    void *src1_host = NULL;
+    void *src2_host = NULL;
+    void *cmp_host = NULL;
+    void *dest_host = NULL;
 
     cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE;
     size_t block_elements = BUFFER_SIZE / type_size[stype];
@@ -359,6 +363,30 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
     dest = clCreateBuffer( context, CL_MEM_WRITE_ONLY, BUFFER_SIZE, NULL, &err );
     if( err ) { log_error( "Error: could not allocate dest buffer\n" );  ++s_test_fail; goto exit; }
 
+    src1_host = malloc(BUFFER_SIZE);
+    if (NULL == src1_host)
+    {
+        log_error("Error: could not allocate src1_host buffer\n");
+        goto exit;
+    }
+    src2_host = malloc(BUFFER_SIZE);
+    if (NULL == src2_host)
+    {
+        log_error("Error: could not allocate src2_host buffer\n");
+        goto exit;
+    }
+    cmp_host = malloc(BUFFER_SIZE);
+    if (NULL == cmp_host)
+    {
+        log_error("Error: could not allocate cmp_host buffer\n");
+        goto exit;
+    }
+    dest_host = malloc(BUFFER_SIZE);
+    if (NULL == dest_host)
+    {
+        log_error("Error: could not allocate dest_host buffer\n");
+        goto exit;
+    }
 
     // We block the test as we are running over the range of compare values
     // "block the test" means "break the test into blocks"
@@ -387,13 +415,6 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
         // Setup the input data to change for each block
         initCmpBuffer(s3, cmptype, i * cmp_stride, block_elements);
 
-        // Create the reference result
-        Select sfunc = (cmptype == ctype[stype][0]) ? vrefSelects[stype][0] : vrefSelects[stype][1];
-        (*sfunc)(ref, s1, s2, s3, block_elements);
-
-        sfunc = (cmptype == ctype[stype][0]) ? refSelects[stype][0] : refSelects[stype][1];
-        (*sfunc)(sref, s1, s2, s3, block_elements);
-
         if( (err = clEnqueueUnmapMemObject( queue, src1, s1, 0, NULL, NULL )))
         { log_error( "Error: coult not unmap src1\n" );  ++s_test_fail; goto exit; }
         if( (err = clEnqueueUnmapMemObject( queue, src2, s2, 0, NULL, NULL )))
@@ -401,6 +422,40 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
         if( (err = clEnqueueUnmapMemObject( queue, cmp, s3, 0, NULL, NULL )))
         { log_error( "Error: coult not unmap cmp\n" );  ++s_test_fail; goto exit; }
 
+        // Create the reference result
+        err = clEnqueueReadBuffer(queue, src1, CL_TRUE, 0, BUFFER_SIZE,
+                                  src1_host, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("Error: Reading buffer from src1 to src1_host failed\n");
+            ++s_test_fail;
+            goto exit;
+        }
+        err = clEnqueueReadBuffer(queue, src2, CL_TRUE, 0, BUFFER_SIZE,
+                                  src2_host, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("Error: Reading buffer from src2 to src2_host failed\n");
+            ++s_test_fail;
+            goto exit;
+        }
+        err = clEnqueueReadBuffer(queue, cmp, CL_TRUE, 0, BUFFER_SIZE, cmp_host,
+                                  0, NULL, NULL);
+        if (err)
+        {
+            log_error("Error: Reading buffer from cmp to cmp_host failed\n");
+            ++s_test_fail;
+            goto exit;
+        }
+
+        Select sfunc = (cmptype == ctype[stype][0]) ? vrefSelects[stype][0]
+                                                    : vrefSelects[stype][1];
+        (*sfunc)(ref, src1_host, src2_host, cmp_host, block_elements);
+
+        sfunc = (cmptype == ctype[stype][0]) ? refSelects[stype][0]
+                                             : refSelects[stype][1];
+        (*sfunc)(sref, src1_host, src2_host, cmp_host, block_elements);
+
         for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize)
         {
             size_t vector_size = element_count[vecsize] * type_size[stype];
@@ -415,7 +470,6 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
             if((err = clSetKernelArg(kernels[vecsize], 3,  sizeof cmp, &cmp) ))
             { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; }
 
-
             // Wipe destination
             void *d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err );
             if( err ){ log_error( "Error: Could not map dest" );  ++s_test_fail; goto exit; }
@@ -429,18 +483,22 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
                 goto exit;
             }
 
-            d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_READ, 0, BUFFER_SIZE, 0, NULL, NULL, &err );
-            if( err ){ log_error( "Error: Could not map dest # 2" );  ++s_test_fail; goto exit; }
-
-            if ((*checkResults[stype])(d, vecsize == 0 ? sref : ref, block_elements, element_count[vecsize])!=0){
-                log_error("vec_size:%d indx: 0x%16.16llx\n", (int)element_count[vecsize], i);
+            err = clEnqueueReadBuffer(queue, dest, CL_TRUE, 0, BUFFER_SIZE,
+                                      dest_host, 0, NULL, NULL);
+            if (err)
+            {
+                log_error(
+                    "Error: Reading buffer from dest to dest_host failed\n");
                 ++s_test_fail;
                 goto exit;
             }
 
-            if( (err = clEnqueueUnmapMemObject( queue, dest, d, 0, NULL, NULL ) ) )
+            if ((*checkResults[stype])(dest_host, vecsize == 0 ? sref : ref,
+                                       block_elements, element_count[vecsize])
+                != 0)
             {
-                log_error( "Error: Could not unmap dest" );
+                log_error("vec_size:%d indx: 0x%16.16llx\n",
+                          (int)element_count[vecsize], i);
                 ++s_test_fail;
                 goto exit;
             }
@@ -459,6 +517,10 @@ exit:
     if( dest)   clReleaseMemObject( dest );
     if( ref )   free(ref );
     if( sref )  free(sref );
+    if (src1_host) free(src1_host);
+    if (src2_host) free(src2_host);
+    if (cmp_host) free(cmp_host);
+    if (dest_host) free(dest_host);
 
     for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; vecsize++) {
         clReleaseKernel(kernels[vecsize]);

From 19f4fc3f3d69bea530182164dd061c7dfda166b0 Mon Sep 17 00:00:00 2001
From: Chris Gearing <109952055+MeyeChris@users.noreply.github.com>
Date: Tue, 30 May 2023 18:05:18 +0200
Subject: [PATCH 21/30] Make extended_async_copy tests type agnostic (#1619)

The latest version of the cl_khr_extended_async_copies extension uses
element size rather the element type as its base. The means it can be
called with arbitrary and in particular non power of 2 sizes, such as 3
or 13.

Update the test_async_copy2D and test_async_copy3D tests to make them
element size based rather than type based.

As well as this run all tests that can fit into the memory of the
target rather than presumed large elements cannot fit.

Make some addtional good practice changes in terms of const usage,
declaring variables where they are use, and usage of iterators.

The test coverage increases from 1224 cases to 1332 cases for the
test_async_copy2D and test_async_copy3D cases.

Ticket: #1579

Signed-off-by: Chris Gearing <chris.gearing@mobileye.com>
Co-authored-by: Chris Gearing <chris.gearing@mobileye.com>
---
 test_conformance/basic/test_async_copy2D.cpp | 182 ++++++++---------
 test_conformance/basic/test_async_copy3D.cpp | 204 ++++++++-----------
 2 files changed, 171 insertions(+), 215 deletions(-)

diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp
index bf3f1552..11ef84bd 100644
--- a/test_conformance/basic/test_async_copy2D.cpp
+++ b/test_conformance/basic/test_async_copy2D.cpp
@@ -27,17 +27,25 @@
 
 static const char *async_global_to_local_kernel2D = R"OpenCLC(
 #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
-%s // optional pragma string
 
-__kernel void test_fn(const __global %s *src, __global %s *dst,
-                      __local %s *localBuffer, int numElementsPerLine,
+#define STRUCT_SIZE %d
+typedef struct __attribute__((packed))
+{
+    uchar byte[STRUCT_SIZE];
+} VarSizeStruct __attribute__((aligned(1)));
+
+
+__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst,
+                      __local VarSizeStruct *localBuffer, int numElementsPerLine,
                       int lineCopiesPerWorkgroup, int lineCopiesPerWorkItem,
                       int srcStride, int dstStride) {
   // Zero the local storage first
   for (int i = 0; i < lineCopiesPerWorkItem; i++) {
     for (int j = 0; j < numElementsPerLine; j++) {
       const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
-      localBuffer[index] = (%s)(%s)0;
+      for (int k = 0; k < STRUCT_SIZE; k++) {
+        localBuffer[index].byte[k] = 0;
+      }
     }
   }
 
@@ -45,7 +53,7 @@ __kernel void test_fn(const __global %s *src, __global %s *dst,
   // try the copy
   barrier( CLK_LOCAL_MEM_FENCE );
   event_t event = async_work_group_copy_2D2D(localBuffer, 0, src,
-    lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(%s),
+    lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(VarSizeStruct),
     (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0);
 
   // Wait for the copy to complete, then verify by manually copying to the dest
@@ -63,16 +71,24 @@ __kernel void test_fn(const __global %s *src, __global %s *dst,
 
 static const char *async_local_to_global_kernel2D = R"OpenCLC(
 #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
-%s // optional pragma string
 
-__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
+#define STRUCT_SIZE %d
+typedef struct __attribute__((packed))
+{
+    uchar byte[STRUCT_SIZE];
+} VarSizeStruct __attribute__((aligned(1)));
+
+
+__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer,
                       int numElementsPerLine, int lineCopiesPerWorkgroup,
                       int lineCopiesPerWorkItem, int srcStride, int dstStride) {
   // Zero the local storage first
   for (int i = 0; i < lineCopiesPerWorkItem; i++) {
     for (int j = 0; j < numElementsPerLine; j++) {
       const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * srcStride + j;
-      localBuffer[index] = (%s)(%s)0;
+      for (int k = 0; k < STRUCT_SIZE; k++) {
+        localBuffer[index].byte[k] = 0;
+      }
     }
   }
 
@@ -90,36 +106,22 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca
   // Do this to verify all kernels are done copying to the local buffer before we try the copy
   barrier(CLK_LOCAL_MEM_FENCE);
   event_t event = async_work_group_copy_2D2D(dst, lineCopiesPerWorkgroup * get_group_id(0) * dstStride,
-    localBuffer, 0, sizeof(%s), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride,
+    localBuffer, 0, sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride,
    dstStride, 0 );
 
   wait_group_events(1, &event);
 };
 )OpenCLC";
 
-int test_copy2D(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, const char *kernelCode,
-                ExplicitType vecType, int vecSize, int srcMargin, int dstMargin,
-                bool localIsDst)
+int test_copy2D(const cl_device_id deviceID, const cl_context context,
+                const cl_command_queue queue, const char *const kernelCode,
+                const size_t elementSize, const int srcMargin,
+                const int dstMargin, const bool localIsDst)
 {
     int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[2];
-    size_t threads[1], localThreads[1];
-    void *inBuffer, *outBuffer, *outBufferCopy;
-    MTdata d;
-    char vecNameString[64];
-    vecNameString[0] = 0;
-    if (vecSize == 1)
-        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
-    else
-        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType),
-                vecSize);
 
-    size_t elementSize = get_explicit_type_size(vecType) * vecSize;
-    log_info("Testing %s with srcMargin = %d, dstMargin = %d\n", vecNameString,
-             srcMargin, dstMargin);
+    log_info("Testing %d byte element with srcMargin = %d, dstMargin = %d\n",
+             elementSize, srcMargin, dstMargin);
 
     cl_long max_local_mem_size;
     error =
@@ -139,6 +141,13 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
     test_error(error,
                "clGetDeviceInfo for CL_DEVICE_MAX_MEM_ALLOC_SIZE failed.");
 
+    cl_long max_work_group_size;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+                            sizeof(max_work_group_size), &max_work_group_size,
+                            NULL);
+    test_error(error,
+               "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed.");
+
     if (max_alloc_size > max_global_mem_size / 2)
         max_alloc_size = max_global_mem_size / 2;
 
@@ -149,20 +158,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
     test_error(error,
                "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
 
-    char programSource[4096];
-    programSource[0] = 0;
-    char *programPtr;
+    char programSource[4096] = { 0 };
+    const char *programPtr = programSource;
 
-    sprintf(programSource, kernelCode,
-            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
-                               : "",
-            vecNameString, vecNameString, vecNameString, vecNameString,
-            get_explicit_type_name(vecType), vecNameString);
+    sprintf(programSource, kernelCode, elementSize);
     // log_info("program: %s\n", programSource);
-    programPtr = programSource;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
 
     error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        (const char **)&programPtr, "test_fn");
+                                        &programPtr, "test_fn");
     test_error(error, "Unable to create testing kernel");
 
     size_t max_workgroup_size;
@@ -188,9 +194,6 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
     const cl_int dstStride = numElementsPerLine + dstMargin;
     const cl_int srcStride = numElementsPerLine + srcMargin;
 
-    elementSize =
-        get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
-
     const size_t lineCopiesPerWorkItem = 13;
     const size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem
         * elementSize * (localIsDst ? dstStride : srcStride);
@@ -208,7 +211,6 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
     if (maxLocalWorkgroupSize > max_workgroup_size)
         localWorkgroupSize = max_workgroup_size;
 
-
     const size_t maxTotalLinesIn =
         (max_alloc_size / elementSize + srcMargin) / srcStride;
     const size_t maxTotalLinesOut =
@@ -231,9 +233,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
     const size_t globalWorkgroupSize =
         numberOfLocalWorkgroups * localWorkgroupSize;
 
-    inBuffer = (void *)malloc(inBufferSize);
-    outBuffer = (void *)malloc(outBufferSize);
-    outBufferCopy = (void *)malloc(outBufferSize);
+    if ((localBufferSize / 4) > max_work_group_size)
+    {
+        log_info("Skipping due to resource requirements local:%db  "
+                 "max_work_group_size:%d\n",
+                 localBufferSize, max_work_group_size);
+        return 0;
+    }
+
+    void *const inBuffer = (void *)malloc(inBufferSize);
+    void *const outBuffer = (void *)malloc(outBufferSize);
+    void *const outBufferCopy = (void *)malloc(outBufferSize);
 
     const cl_int lineCopiesPerWorkItemInt =
         static_cast<cl_int>(lineCopiesPerWorkItem);
@@ -250,18 +260,20 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
         (int)inBufferSize, (int)outBufferSize, lineCopiesPerWorkgroup,
         lineCopiesPerWorkItemInt);
 
+    size_t threads[1], localThreads[1];
+
     threads[0] = globalWorkgroupSize;
     localThreads[0] = localWorkgroupSize;
 
-    d = init_genrand(gRandomSeed);
-    generate_random_data(
-        vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer);
-    generate_random_data(
-        vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer);
+    MTdata d = init_genrand(gRandomSeed);
+    generate_random_data(kChar, inBufferSize, d, inBuffer);
+    generate_random_data(kChar, outBufferSize, d, outBuffer);
     free_mtdata(d);
     d = NULL;
     memcpy(outBufferCopy, outBuffer, outBufferSize);
 
+    clMemWrapper streams[2];
+
     streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
                                 inBuffer, &error);
     test_error(error, "Unable to create input buffer");
@@ -301,8 +313,7 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
 
     // Verify
     int failuresPrinted = 0;
-    // Verify
-    size_t typeSize = get_explicit_type_size(vecType) * vecSize;
+
     for (int i = 0;
          i < (int)globalWorkgroupSize * lineCopiesPerWorkItem * elementSize;
          i += elementSize)
@@ -313,13 +324,12 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
             int inIdx = i * srcStride + j;
             int outIdx = i * dstStride + j;
             if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx,
-                       typeSize)
+                       elementSize)
                 != 0)
             {
                 unsigned char *inchar = (unsigned char *)inBuffer + inIdx;
                 unsigned char *outchar = (unsigned char *)outBuffer + outIdx;
-                char values[4096];
-                values[0] = 0;
+                char values[4096] = { 0 };
 
                 if (failuresPrinted == 0)
                 {
@@ -382,16 +392,14 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
                           cl_command_queue queue, const char *kernelCode,
                           bool localIsDst)
 {
-    ExplicitType vecType[] = {
-        kChar,  kUChar, kShort,  kUShort,          kInt, kUInt, kLong,
-        kULong, kFloat, kDouble, kNumExplicitTypes
-    };
+    const unsigned int elemSizes[] = { 1, 2,  3,  4,  5,  6, 7,
+                                       8, 13, 16, 32, 47, 64 };
     // The margins below represent the number of elements between the end of
     // one line and the start of the next. The strides are equivalent to the
     // length of the line plus the chosen margin.
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int smallTypesMarginSizes[] = { 0, 10, 100 };
-    unsigned int size, typeIndex, srcMargin, dstMargin;
+    // These have to be multipliers, because the margin must be a multiple of
+    // element size.
+    const unsigned int marginMultipliers[] = { 0, 10, 100 };
 
     int errors = 0;
 
@@ -399,55 +407,27 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
     {
         log_info(
             "Device does not support extended async copies. Skipping test.\n");
-        return 0;
     }
-
-    for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
+    else
     {
-        if (vecType[typeIndex] == kDouble
-            && !is_extension_available(deviceID, "cl_khr_fp64"))
-            continue;
-
-        if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong)
-            && !gHasLong)
-            continue;
-
-        for (size = 0; vecSizes[size] != 0; size++)
+        for (const unsigned int elemSize : elemSizes)
         {
-            if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
-                <= 2) // small type
+            for (const unsigned int srcMarginMultiplier : marginMultipliers)
             {
-                for (srcMargin = 0; srcMargin < sizeof(smallTypesMarginSizes)
-                         / sizeof(smallTypesMarginSizes[0]);
-                     srcMargin++)
+                for (const unsigned int dstMarginMultiplier : marginMultipliers)
                 {
-                    for (dstMargin = 0;
-                         dstMargin < sizeof(smallTypesMarginSizes)
-                             / sizeof(smallTypesMarginSizes[0]);
-                         dstMargin++)
+                    if (test_copy2D(deviceID, context, queue, kernelCode,
+                                    elemSize, srcMarginMultiplier * elemSize,
+                                    dstMarginMultiplier * elemSize, localIsDst))
                     {
-                        if (test_copy2D(deviceID, context, queue, kernelCode,
-                                        vecType[typeIndex], vecSizes[size],
-                                        smallTypesMarginSizes[srcMargin],
-                                        smallTypesMarginSizes[dstMargin],
-                                        localIsDst))
-                        {
-                            errors++;
-                        }
+                        errors++;
                     }
                 }
             }
-            // not a small type, check only zero stride
-            else if (test_copy2D(deviceID, context, queue, kernelCode,
-                                 vecType[typeIndex], vecSizes[size], 0, 0,
-                                 localIsDst))
-            {
-                errors++;
-            }
         }
     }
-    if (errors) return -1;
-    return 0;
+
+    return errors ? -1 : 0;
 }
 
 int test_async_copy_global_to_local2D(cl_device_id deviceID, cl_context context,
diff --git a/test_conformance/basic/test_async_copy3D.cpp b/test_conformance/basic/test_async_copy3D.cpp
index 5eb41ebc..aa22f3a2 100644
--- a/test_conformance/basic/test_async_copy3D.cpp
+++ b/test_conformance/basic/test_async_copy3D.cpp
@@ -27,9 +27,14 @@
 
 static const char *async_global_to_local_kernel3D = R"OpenCLC(
 #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
-%s // optional pragma string
 
-__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
+#define STRUCT_SIZE %d
+typedef struct __attribute__((packed))
+{
+    uchar byte[STRUCT_SIZE];
+} VarSizeStruct __attribute__((aligned(1)));
+
+__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer,
                       int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup,
                       int planesCopiesPerWorkItem, int srcLineStride,
                       int dstLineStride, int srcPlaneStride, int dstPlaneStride ) {
@@ -38,7 +43,9 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca
     for (int j = 0; j < numLines; j++) {
       for (int k = 0; k < numElementsPerLine; k++) {
         const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
-        localBuffer[index] = (%s)(%s)0;
+        for (int k = 0; k < STRUCT_SIZE; k++) {
+          localBuffer[index].byte[k] = 0;
+        }
       }
     }
   }
@@ -48,7 +55,7 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca
 
   event_t event = async_work_group_copy_3D3D(localBuffer, 0, src,
     planesCopiesPerWorkgroup * get_group_id(0) * srcPlaneStride,
-    sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines,
+    sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)numLines,
     planesCopiesPerWorkgroup, srcLineStride, srcPlaneStride, dstLineStride,
     dstPlaneStride, 0);
 
@@ -69,9 +76,14 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca
 
 static const char *async_local_to_global_kernel3D = R"OpenCLC(
 #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
-%s // optional pragma string
 
-__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
+#define STRUCT_SIZE %d
+typedef struct __attribute__((packed))
+{
+    uchar byte[STRUCT_SIZE];
+} VarSizeStruct __attribute__((aligned(1)));
+
+__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer,
                       int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup,
                       int planesCopiesPerWorkItem, int srcLineStride,
                       int dstLineStride, int srcPlaneStride, int dstPlaneStride) {
@@ -80,7 +92,9 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca
     for (int j = 0; j < numLines; j++) {
       for (int k = 0; k < numElementsPerLine; k++) {
         const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j * srcLineStride + k;
-        localBuffer[index] = (%s)(%s)0;
+        for (int k = 0; k < STRUCT_SIZE; k++) {
+          localBuffer[index].byte[k] = 0;
+        }
       }
     }
   }
@@ -103,39 +117,26 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca
 
   event_t event = async_work_group_copy_3D3D(dst,
     planesCopiesPerWorkgroup * get_group_id(0) * dstPlaneStride, localBuffer, 0,
-    sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup,
+    sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup,
     srcLineStride, srcPlaneStride, dstLineStride, dstPlaneStride, 0);
 
   wait_group_events(1, &event);
 }
 )OpenCLC";
 
-int test_copy3D(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, const char *kernelCode,
-                ExplicitType vecType, int vecSize, int srcLineMargin,
-                int dstLineMargin, int srcPlaneMargin, int dstPlaneMargin,
-                bool localIsDst)
+int test_copy3D(const cl_device_id deviceID, const cl_context context,
+                const cl_command_queue queue, const char *const kernelCode,
+                const size_t elementSize, const int srcLineMargin,
+                const int dstLineMargin, const int srcPlaneMargin,
+                const int dstPlaneMargin, const bool localIsDst)
 {
     int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[2];
-    size_t threads[1], localThreads[1];
-    void *inBuffer, *outBuffer, *outBufferCopy;
-    MTdata d;
-    char vecNameString[64];
-    vecNameString[0] = 0;
-    if (vecSize == 1)
-        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
-    else
-        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType),
-                vecSize);
 
-    size_t elementSize = get_explicit_type_size(vecType) * vecSize;
-    log_info("Testing %s with srcLineMargin = %d, dstLineMargin = %d, "
-             "srcPlaneMargin = %d, dstPlaneMargin = %d\n",
-             vecNameString, srcLineMargin, dstLineMargin, srcPlaneMargin,
-             dstPlaneMargin);
+    log_info(
+        "Testing %d byte element with srcLineMargin = %d, dstLineMargin = %d, "
+        "srcPlaneMargin = %d, dstPlaneMargin = %d\n",
+        elementSize, srcLineMargin, dstLineMargin, srcPlaneMargin,
+        dstPlaneMargin);
 
     cl_long max_local_mem_size;
     error =
@@ -165,20 +166,16 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
     test_error(error,
                "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
 
-    char programSource[4096];
-    programSource[0] = 0;
-    char *programPtr;
+    char programSource[4096] = { 0 };
+    const char *programPtr = programSource;
 
-    sprintf(programSource, kernelCode,
-            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
-                               : "",
-            vecNameString, vecNameString, vecNameString, vecNameString,
-            get_explicit_type_name(vecType), vecNameString, vecNameString);
+    sprintf(programSource, kernelCode, elementSize);
     // log_info("program: %s\n", programSource);
-    programPtr = programSource;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
 
     error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        (const char **)&programPtr, "test_fn");
+                                        &programPtr, "test_fn");
     test_error(error, "Unable to create testing kernel");
 
     size_t max_workgroup_size;
@@ -196,6 +193,13 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
     test_error(error,
                "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
 
+    cl_long max_work_group_size;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+                            sizeof(max_work_group_size), &max_work_group_size,
+                            NULL);
+    test_error(error,
+               "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed.");
+
     // Pick the minimum of the device and the kernel
     if (max_workgroup_size > max_local_workgroup_size[0])
         max_workgroup_size = max_local_workgroup_size[0];
@@ -208,8 +212,6 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
     const cl_int dstPlaneStride = (numLines * dstLineStride) + dstPlaneMargin;
     const cl_int srcPlaneStride = (numLines * srcLineStride) + srcPlaneMargin;
 
-    elementSize =
-        get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
     const size_t planesCopiesPerWorkItem = 2;
     const size_t localStorageSpacePerWorkitem = elementSize
         * planesCopiesPerWorkItem
@@ -251,9 +253,17 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
     const size_t globalWorkgroupSize =
         numberOfLocalWorkgroups * localWorkgroupSize;
 
-    inBuffer = (void *)malloc(inBufferSize);
-    outBuffer = (void *)malloc(outBufferSize);
-    outBufferCopy = (void *)malloc(outBufferSize);
+    if ((localBufferSize / 4) > max_work_group_size)
+    {
+        log_info("Skipping due to resource requirements local:%db  "
+                 "max_work_group_size:%d\n",
+                 localBufferSize, max_work_group_size);
+        return 0;
+    }
+
+    void *const inBuffer = (void *)malloc(inBufferSize);
+    void *const outBuffer = (void *)malloc(outBufferSize);
+    void *const outBufferCopy = (void *)malloc(outBufferSize);
 
     const cl_int planesCopiesPerWorkItemInt =
         static_cast<cl_int>(planesCopiesPerWorkItem);
@@ -270,18 +280,20 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
              (int)localBufferSize, (int)inBufferSize, (int)outBufferSize,
              planesCopiesPerWorkgroup, planesCopiesPerWorkItemInt);
 
+    size_t threads[1], localThreads[1];
+
     threads[0] = globalWorkgroupSize;
     localThreads[0] = localWorkgroupSize;
 
-    d = init_genrand(gRandomSeed);
-    generate_random_data(
-        vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer);
-    generate_random_data(
-        vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer);
+    MTdata d = init_genrand(gRandomSeed);
+    generate_random_data(kChar, inBufferSize, d, inBuffer);
+    generate_random_data(kChar, outBufferSize, d, outBuffer);
     free_mtdata(d);
     d = NULL;
     memcpy(outBufferCopy, outBuffer, outBufferSize);
 
+    clMemWrapper streams[2];
+
     streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
                                 inBuffer, &error);
     test_error(error, "Unable to create input buffer");
@@ -327,8 +339,7 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
 
     // Verify
     int failuresPrinted = 0;
-    // Verify
-    size_t typeSize = get_explicit_type_size(vecType) * vecSize;
+
     for (int i = 0;
          i < (int)globalWorkgroupSize * planesCopiesPerWorkItem * elementSize;
          i += elementSize)
@@ -341,14 +352,13 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
                 int inIdx = i * srcPlaneStride + j * srcLineStride + k;
                 int outIdx = i * dstPlaneStride + j * dstLineStride + k;
                 if (memcmp(((char *)inBuffer) + inIdx,
-                           ((char *)outBuffer) + outIdx, typeSize)
+                           ((char *)outBuffer) + outIdx, elementSize)
                     != 0)
                 {
                     unsigned char *inchar = (unsigned char *)inBuffer + inIdx;
                     unsigned char *outchar =
                         (unsigned char *)outBuffer + outIdx;
-                    char values[4096];
-                    values[0] = 0;
+                    char values[4096] = { 0 };
 
                     if (failuresPrinted == 0)
                     {
@@ -439,17 +449,14 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
                           cl_command_queue queue, const char *kernelCode,
                           bool localIsDst)
 {
-    ExplicitType vecType[] = {
-        kChar,  kUChar, kShort,  kUShort,          kInt, kUInt, kLong,
-        kULong, kFloat, kDouble, kNumExplicitTypes
-    };
+    const unsigned int elemSizes[] = { 1, 2,  3,  4,  5,  6, 7,
+                                       8, 13, 16, 32, 47, 64 };
     // The margins below represent the number of elements between the end of
-    // one line or plane and the start of the next. The strides are equivalent
-    // to the size of the line or plane plus the chosen margin.
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int smallTypesMarginSizes[] = { 0, 10, 100 };
-    unsigned int size, typeIndex, srcLineMargin, dstLineMargin, srcPlaneMargin,
-        dstPlaneMargin;
+    // one line and the start of the next. The strides are equivalent to the
+    // size of the line or plane plus the chosen margin.
+    // These have to be multipliers, because the margin must be a multiple of
+    // element size.
+    const unsigned int marginMultipliers[] = { 0, 10, 100 };
 
     int errors = 0;
 
@@ -457,67 +464,36 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
     {
         log_info(
             "Device does not support extended async copies. Skipping test.\n");
-        return 0;
     }
-
-    for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
+    else
     {
-        if (vecType[typeIndex] == kDouble
-            && !is_extension_available(deviceID, "cl_khr_fp64"))
-            continue;
-
-        if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong)
-            && !gHasLong)
-            continue;
-
-        for (size = 0; vecSizes[size] != 0; size++)
+        for (const unsigned int elemSize : elemSizes)
         {
-            if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
-                <= 2) // small type
+            for (const unsigned int srcLineMarginMultiplier : marginMultipliers)
             {
-                for (srcLineMargin = 0;
-                     srcLineMargin < sizeof(smallTypesMarginSizes)
-                         / sizeof(smallTypesMarginSizes[0]);
-                     srcLineMargin++)
+                for (const unsigned int dstLineMarginMultiplier :
+                     marginMultipliers)
                 {
-                    for (dstLineMargin = 0;
-                         dstLineMargin < sizeof(smallTypesMarginSizes)
-                             / sizeof(smallTypesMarginSizes[0]);
-                         dstLineMargin++)
+                    for (const unsigned int srcPlaneMarginMultiplier :
+                         marginMultipliers)
                     {
-                        for (srcPlaneMargin = 0;
-                             srcPlaneMargin < sizeof(smallTypesMarginSizes)
-                                 / sizeof(smallTypesMarginSizes[0]);
-                             srcPlaneMargin++)
+                        for (const unsigned int dstPlaneMarginMultiplier :
+                             marginMultipliers)
                         {
-                            for (dstPlaneMargin = 0;
-                                 dstPlaneMargin < sizeof(smallTypesMarginSizes)
-                                     / sizeof(smallTypesMarginSizes[0]);
-                                 dstPlaneMargin++)
+                            if (test_copy3D(deviceID, context, queue,
+                                            kernelCode, elemSize,
+                                            srcLineMarginMultiplier * elemSize,
+                                            dstLineMarginMultiplier * elemSize,
+                                            srcPlaneMarginMultiplier * elemSize,
+                                            dstPlaneMarginMultiplier * elemSize,
+                                            localIsDst))
                             {
-                                if (test_copy3D(
-                                        deviceID, context, queue, kernelCode,
-                                        vecType[typeIndex], vecSizes[size],
-                                        smallTypesMarginSizes[srcLineMargin],
-                                        smallTypesMarginSizes[dstLineMargin],
-                                        smallTypesMarginSizes[srcPlaneMargin],
-                                        smallTypesMarginSizes[dstPlaneMargin],
-                                        localIsDst))
-                                {
-                                    errors++;
-                                }
+                                errors++;
                             }
                         }
                     }
                 }
             }
-            // not a small type, check only zero stride
-            else if (test_copy3D(deviceID, context, queue, kernelCode,
-                                 vecType[typeIndex], vecSizes[size], 0, 0, 0, 0,
-                                 localIsDst))
-            {
-                errors++;
-            }
         }
     }
     if (errors) return -1;

From abd556f7a2ad98d1b08b145b6fd5c5e813fda125 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Wed, 31 May 2023 10:02:54 +0100
Subject: [PATCH 22/30] relationals: add missing virtual destructor (#1739)

`RelationalsFPTest` contains a vector of `RelTestBase` pointers to
`RelTestParams` instances, so the base class destructor should be
virtual to avoid undefined behaviour.

Fixes https://github.com/KhronosGroup/OpenCL-CTS/issues/1731

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/relationals/test_comparisons_fp.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test_conformance/relationals/test_comparisons_fp.h b/test_conformance/relationals/test_comparisons_fp.h
index 7faca1c5..66c62c2d 100644
--- a/test_conformance/relationals/test_comparisons_fp.h
+++ b/test_conformance/relationals/test_comparisons_fp.h
@@ -32,6 +32,7 @@ template <typename T> using VerifyFunc = bool (*)(const T &, const T &);
 struct RelTestBase
 {
     explicit RelTestBase(const ExplicitTypes &dt): dataType(dt) {}
+    virtual ~RelTestBase() = default;
     ExplicitTypes dataType;
 };
 

From ce1754981212e540ad5a5c4645139fca77f89b2b Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Fri, 2 Jun 2023 10:57:10 +0100
Subject: [PATCH 23/30] cmake: do not suppress -Wsometimes-uninitialized
 globally (#1741)

Fix an instance of this warning in mem_host_flags.

Only disable `-Wsometimes-uninitialized` for the SVM test, which does
not compile cleanly with this warning enabled.  Re-enable the warning
for the other tests, so that it can catch any new occurrences.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 CMakeLists.txt                            | 1 -
 test_conformance/SVM/CMakeLists.txt       | 2 ++
 test_conformance/mem_host_flags/checker.h | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4fce58d8..8d56b64d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -105,7 +105,6 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang"
         add_cxx_flag_if_supported(-Wall)
         # Suppress warnings that currently trigger on the code base.
         # This list should shrink over time when warnings are fixed.
-        add_cxx_flag_if_supported(-Wno-sometimes-uninitialized)
         add_cxx_flag_if_supported(-Wno-sign-compare)
     endif()
     add_cxx_flag_if_supported(-Wno-narrowing)
diff --git a/test_conformance/SVM/CMakeLists.txt b/test_conformance/SVM/CMakeLists.txt
index 2d01a825..efa597d1 100644
--- a/test_conformance/SVM/CMakeLists.txt
+++ b/test_conformance/SVM/CMakeLists.txt
@@ -17,4 +17,6 @@ set(${MODULE_NAME}_SOURCES
     test_migrate.cpp
 )
 
+set_gnulike_module_compile_flags("-Wno-sometimes-uninitialized")
+
 include(../CMakeCommon.txt)
diff --git a/test_conformance/mem_host_flags/checker.h b/test_conformance/mem_host_flags/checker.h
index 835f120b..0bb826f4 100644
--- a/test_conformance/mem_host_flags/checker.h
+++ b/test_conformance/mem_host_flags/checker.h
@@ -219,7 +219,7 @@ cl_int cBuffer_checker<T>::SetupASSubBuffer(cl_mem_flags parent_buffer_flag)
         err = CL_SUCCESS;
     }
 
-    cl_mem_flags f;
+    cl_mem_flags f = 0;
     if (parent_buffer_flag & CL_MEM_HOST_READ_ONLY)
         f = CL_MEM_HOST_READ_ONLY;
     else if (parent_buffer_flag & CL_MEM_HOST_WRITE_ONLY)

From 63a8cb6b9d3345feec7621c7e0b0a4ca21cf545a Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Mon, 5 Jun 2023 11:45:25 +0100
Subject: [PATCH 24/30] non_uniform_work_group: fix unused-but-set variables
 (#1733)

Remove the unused `nonRemainderGlobalSize` array.  Inspect the result
of the `clGetDeviceInfo` call.

As this fixes all occurrences of this warning, remove the suppression
flag from this test.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/non_uniform_work_group/CMakeLists.txt |  2 --
 .../non_uniform_work_group/TestNonUniformWorkGroup.cpp | 10 +++++-----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/test_conformance/non_uniform_work_group/CMakeLists.txt b/test_conformance/non_uniform_work_group/CMakeLists.txt
index f78dd195..30c3a846 100644
--- a/test_conformance/non_uniform_work_group/CMakeLists.txt
+++ b/test_conformance/non_uniform_work_group/CMakeLists.txt
@@ -10,8 +10,6 @@ set(${MODULE_NAME}_SOURCES
     tools.cpp
 )
 
-set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
-
 include(../CMakeCommon.txt)
 
 # end of file #
diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
index a4a6a744..44781ca8 100644
--- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
@@ -448,13 +448,8 @@ void TestNonUniformWorkGroup::verifyData (DataContainerAttrib * reference, DataC
 }
 
 void TestNonUniformWorkGroup::calculateExpectedValues () {
-  size_t nonRemainderGlobalSize[MAX_DIMS];
   size_t numberOfPossibleRegions[MAX_DIMS];
 
-  nonRemainderGlobalSize[0] = _globalSize[0] - (_globalSize[0] % _enqueuedLocalSize[0]);
-  nonRemainderGlobalSize[1] = _globalSize[1] - (_globalSize[1] % _enqueuedLocalSize[1]);
-  nonRemainderGlobalSize[2] = _globalSize[2] - (_globalSize[2] % _enqueuedLocalSize[2]);
-
   numberOfPossibleRegions[0] = (_globalSize[0]>1)?2:1;
   numberOfPossibleRegions[1] = (_globalSize[1]>1)?2:1;
   numberOfPossibleRegions[2] = (_globalSize[2]>1)?2:1;
@@ -502,6 +497,11 @@ size_t TestNonUniformWorkGroup::getMaxLocalWorkgroupSize (const cl_device_id &de
   if (TestNonUniformWorkGroup::_maxLocalWorkgroupSize == 0) {
     err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
       sizeof(TestNonUniformWorkGroup::_maxLocalWorkgroupSize), &TestNonUniformWorkGroup::_maxLocalWorkgroupSize, NULL);
+    if (err)
+    {
+        log_error("clGetDeviceInfo failed\n");
+        return 0;
+    }
   }
 
   return TestNonUniformWorkGroup::_maxLocalWorkgroupSize;

From c467391680ff31eee1853b7c02b42669a98ee4b6 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 6 Jun 2023 17:46:56 +0200
Subject: [PATCH 25/30] Added support for cl_khr_fp16 extension in
 test_async_copy from basic (issue #142, basic) (#1707)

---
 test_conformance/basic/test_async_copy.cpp | 69 ++++++++++++----------
 1 file changed, 39 insertions(+), 30 deletions(-)

diff --git a/test_conformance/basic/test_async_copy.cpp b/test_conformance/basic/test_async_copy.cpp
index a537c8fe..bb529bce 100644
--- a/test_conformance/basic/test_async_copy.cpp
+++ b/test_conformance/basic/test_async_copy.cpp
@@ -20,8 +20,7 @@
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-
-
+#include <vector>
 
 #include "procs.h"
 #include "harness/conversions.h"
@@ -86,8 +85,7 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
     clKernelWrapper kernel;
     clMemWrapper streams[ 2 ];
     size_t threads[ 1 ], localThreads[ 1 ];
-    void *inBuffer, *outBuffer;
-    MTdata d;
+    MTdataHolder d(gRandomSeed);
     char vecNameString[64]; vecNameString[0] = 0;
     if (vecSize == 1)
         sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
@@ -109,9 +107,15 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
     char programSource[4096]; programSource[0]=0;
     char *programPtr;
 
-    sprintf(programSource, kernelCode,
-            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-            vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
+    std::string extStr = "";
+    if (vecType == kDouble)
+        extStr = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
+    else if (vecType == kHalf)
+        extStr = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable";
+
+    sprintf(programSource, kernelCode, extStr.c_str(), vecNameString,
+            vecNameString, vecNameString, vecNameString,
+            get_explicit_type_name(vecType), vecNameString, vecNameString);
     //log_info("program: %s\n", programSource);
     programPtr = programSource;
 
@@ -150,9 +154,10 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
     size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize;
     size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
 
-    inBuffer = (void*)malloc(globalBufferSize);
-    outBuffer = (void*)malloc(globalBufferSize);
-    memset(outBuffer, 0, globalBufferSize);
+    std::vector<unsigned char> inBuffer(globalBufferSize);
+    std::vector<unsigned char> outBuffer(globalBufferSize);
+
+    outBuffer.assign(globalBufferSize, 0);
 
     cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
     copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
@@ -164,13 +169,15 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
     threads[0] = globalWorkgroupSize;
     localThreads[0] = localWorkgroupSize;
 
-    d = init_genrand( gRandomSeed );
-    generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
-    free_mtdata(d); d = NULL;
+    generate_random_data(vecType,
+                         globalBufferSize / get_explicit_type_size(vecType), d,
+                         &inBuffer.front());
 
-    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
+    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
+                                &inBuffer.front(), &error);
     test_error( error, "Unable to create input buffer" );
-    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
+    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
+                                &outBuffer.front(), &error);
     test_error( error, "Unable to create output buffer" );
 
     error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
@@ -189,16 +196,18 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
     test_error( error, "Unable to queue kernel" );
 
     // Read
-    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
+    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, globalBufferSize,
+                                &outBuffer.front(), 0, NULL, NULL);
     test_error( error, "Unable to read results" );
 
     // Verify
     int failuresPrinted = 0;
-    if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 )
+    if (memcmp(&inBuffer.front(), &outBuffer.front(), globalBufferSize) != 0)
     {
         size_t typeSize = get_explicit_type_size(vecType)* vecSize;
-        unsigned char * inchar = (unsigned char*)inBuffer;
-        unsigned char * outchar = (unsigned char*)outBuffer;
+        unsigned char *inchar = static_cast<unsigned char *>(&inBuffer.front());
+        unsigned char *outchar =
+            static_cast<unsigned char *>(&outBuffer.front());
         for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) {
             if (memcmp( ((char *)inchar)+i, ((char *)outchar)+i, typeSize) != 0 )
             {
@@ -226,26 +235,29 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
         }
     }
 
-    free(inBuffer);
-    free(outBuffer);
-
     return failuresPrinted ? -1 : 0;
 }
 
 int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) {
-    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    const std::vector<ExplicitType> vecType = { kChar,  kUChar, kShort, kUShort,
+                                                kInt,   kUInt,  kLong,  kULong,
+                                                kFloat, kHalf,  kDouble };
     unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
     unsigned int size, typeIndex;
 
     int errors = 0;
 
-    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
-    {
-        if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
-            continue;
+    bool fp16Support = is_extension_available(deviceID, "cl_khr_fp16");
+    bool fp64Support = is_extension_available(deviceID, "cl_khr_fp64");
 
+    for (typeIndex = 0; typeIndex < vecType.size(); typeIndex++)
+    {
         if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
             continue;
+        else if (vecType[typeIndex] == kDouble && !fp64Support)
+            continue;
+        else if (vecType[typeIndex] == kHalf && !fp16Support)
+            continue;
 
         for( size = 0; vecSizes[ size ] != 0; size++ )
         {
@@ -259,9 +271,6 @@ int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_qu
     return 0;
 }
 
-
-
-
 int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel );

From b843b3bd8c064f0f3d966eaf17809dd9ea03b3a6 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Wed, 7 Jun 2023 10:51:38 +0100
Subject: [PATCH 26/30] relationals: fix missing includes (#1753)

With GCC 13 some headers are no longer included transitively through
C++ Standard Library headers.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/relationals/test_comparisons_fp.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test_conformance/relationals/test_comparisons_fp.cpp b/test_conformance/relationals/test_comparisons_fp.cpp
index 580b7422..c3d8f67a 100644
--- a/test_conformance/relationals/test_comparisons_fp.cpp
+++ b/test_conformance/relationals/test_comparisons_fp.cpp
@@ -14,6 +14,8 @@
 // limitations under the License.
 //
 
+#include <cstdint>
+#include <functional>
 #include <iostream>
 #include <map>
 #include <memory>

From 658a8b62fbfd01e15fcd45a85c0e48565addf3f6 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Wed, 7 Jun 2023 15:07:25 +0100
Subject: [PATCH 27/30] [CI] Build Vulkan loader with USE_GAS=ON (#1756)

https://github.com/KhronosGroup/Vulkan-Loader/pull/1212 broke builds
that set `USE_GAS=OFF`.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 presubmit.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/presubmit.sh b/presubmit.sh
index 605c10b0..10354abf 100755
--- a/presubmit.sh
+++ b/presubmit.sh
@@ -77,7 +77,6 @@ cmake .. -G Ninja \
       -DBUILD_WSI_XLIB_SUPPORT=OFF \
       -DBUILD_WSI_XCB_SUPPORT=OFF \
       -DBUILD_WSI_WAYLAND_SUPPORT=OFF \
-      -DUSE_GAS=OFF \
       -C helper.cmake ..
 cmake --build . -j2
 

From 1011f8ea815fbfc0bcc3333490b583c977f36787 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com>
Date: Wed, 7 Jun 2023 16:53:12 +0200
Subject: [PATCH 28/30] fix async strided test outputing error during verify
 (#1754)

This bug was introduced by
https://github.com/KhronosGroup/OpenCL-CTS/pull/1711

Ref google/clspv#1127
---
 test_conformance/basic/test_async_strided_copy.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/basic/test_async_strided_copy.cpp
index 4a848c0f..932e9b8c 100644
--- a/test_conformance/basic/test_async_strided_copy.cpp
+++ b/test_conformance/basic/test_async_strided_copy.cpp
@@ -211,9 +211,9 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
         if (memcmp(&inBuffer.at(i), &outBuffer.at(i), typeSize) != 0)
         {
             unsigned char *inchar =
-                static_cast<unsigned char *>(inBuffer.data());
+                static_cast<unsigned char *>(&inBuffer.at(i));
             unsigned char *outchar =
-                static_cast<unsigned char *>(outBuffer.data());
+                static_cast<unsigned char *>(&outBuffer.at(i));
             char values[4096];
             values[0] = 0;
 

From c8061ab21ad2f3d6a0e058a56c98b5bb968acf41 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 8 Jun 2023 13:27:20 +0100
Subject: [PATCH 29/30] mem_host_flags: use size_t for element count (#1755)

More recent GCC versions (e.g. 12.2, 13.1) report that the argument to
`new[]` in the `Init` methods exceeds the maximum object size,
seemingly related to the negative range of the widened `int`.

Use an unsigned type to avoid the warning and propagate the signedness
change to other uses of the `num_elements` member.

Fixes https://github.com/KhronosGroup/OpenCL-CTS/issues/1582

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 .../mem_host_flags/C_host_memory_block.h      | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/test_conformance/mem_host_flags/C_host_memory_block.h b/test_conformance/mem_host_flags/C_host_memory_block.h
index 78692d17..0784c2c2 100644
--- a/test_conformance/mem_host_flags/C_host_memory_block.h
+++ b/test_conformance/mem_host_flags/C_host_memory_block.h
@@ -24,14 +24,14 @@
 
 template <class T> class C_host_memory_block {
 public:
-    int num_elements;
+    size_t num_elements;
     int element_size;
     T *pData;
 
     C_host_memory_block();
     ~C_host_memory_block();
-    void Init(int num_elem, T &value);
-    void Init(int num_elem);
+    void Init(size_t num_elem, T &value);
+    void Init(size_t num_elem);
     void Set_to(T &val);
     void Set_to_zero();
     bool Equal_to(T &val);
@@ -40,7 +40,7 @@ public:
     bool Equal_rect(C_host_memory_block<T> &another, size_t *host_origin,
                     size_t *region, size_t host_row_pitch,
                     size_t host_slice_pitch);
-    bool Equal(T *pData, int num_elements);
+    bool Equal(T *pData, size_t num_elements);
 
     bool Equal_rect_from_orig(C_host_memory_block<T> &another, size_t *soffset,
                               size_t *region, size_t host_row_pitch,
@@ -63,20 +63,20 @@ template <class T> C_host_memory_block<T>::~C_host_memory_block()
     num_elements = 0;
 }
 
-template <class T> void C_host_memory_block<T>::Init(int num_elem, T &value)
+template <class T> void C_host_memory_block<T>::Init(size_t num_elem, T &value)
 {
     if (pData != NULL) delete[] pData;
     pData = new T[num_elem];
-    for (int i = 0; i < num_elem; i++) pData[i] = value;
+    for (size_t i = 0; i < num_elem; i++) pData[i] = value;
 
     num_elements = num_elem;
 }
 
-template <class T> void C_host_memory_block<T>::Init(int num_elem)
+template <class T> void C_host_memory_block<T>::Init(size_t num_elem)
 {
     if (pData != NULL) delete[] pData;
     pData = new T[num_elem];
-    for (int i = 0; i < num_elem; i++) pData[i] = (T)i;
+    for (size_t i = 0; i < num_elem; i++) pData[i] = (T)i;
 
     num_elements = num_elem;
 }
@@ -88,14 +88,14 @@ template <class T> void C_host_memory_block<T>::Set_to_zero()
 
 template <class T> void C_host_memory_block<T>::Set_to(T &val)
 {
-    for (int i = 0; i < num_elements; i++) pData[i] = val;
+    for (size_t i = 0; i < num_elements; i++) pData[i] = val;
 }
 
 template <class T> bool C_host_memory_block<T>::Equal_to(T &val)
 {
-    int count = 0;
+    size_t count = 0;
 
-    for (int i = 0; i < num_elements; i++)
+    for (size_t i = 0; i < num_elements; i++)
     {
         if (pData[i] == val) count++;
     }
@@ -106,9 +106,9 @@ template <class T> bool C_host_memory_block<T>::Equal_to(T &val)
 template <class T>
 bool C_host_memory_block<T>::Equal(C_host_memory_block<T> &another)
 {
-    int count = 0;
+    size_t count = 0;
 
-    for (int i = 0; i < num_elements; i++)
+    for (size_t i = 0; i < num_elements; i++)
     {
         if (pData[i] == another.pData[i]) count++;
     }
@@ -117,13 +117,13 @@ bool C_host_memory_block<T>::Equal(C_host_memory_block<T> &another)
 }
 
 template <class T>
-bool C_host_memory_block<T>::Equal(T *pIn_Data, int Innum_elements)
+bool C_host_memory_block<T>::Equal(T *pIn_Data, size_t Innum_elements)
 {
     if (this->num_elements != Innum_elements) return false;
 
-    int count = 0;
+    size_t count = 0;
 
-    for (int i = 0; i < num_elements; i++)
+    for (size_t i = 0; i < num_elements; i++)
     {
         if (pData[i] == pIn_Data[i]) count++;
     }
@@ -134,7 +134,7 @@ bool C_host_memory_block<T>::Equal(T *pIn_Data, int Innum_elements)
 template <class T> size_t C_host_memory_block<T>::Count(T &val)
 {
     size_t count = 0;
-    for (int i = 0; i < num_elements; i++)
+    for (size_t i = 0; i < num_elements; i++)
     {
         if (pData[i] == val) count++;
     }

From 475a37abbfa22a55fe47bf76d5c7904b3a37730a Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Fri, 9 Jun 2023 11:25:20 +0100
Subject: [PATCH 30/30] [NFC] Do not use reserved names for include guards
 (#1737)

Names that begin with an underscore followed by an uppercase letter
are reserved for the C++ implementation.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_common/harness/compat.h                               | 6 +++---
 test_common/harness/crc32.h                                | 4 ++--
 test_conformance/c11_atomics/common.h                      | 6 +++---
 test_conformance/c11_atomics/host_atomics.h                | 6 +++---
 test_conformance/d3d10/harness.h                           | 4 ++--
 .../cl_khr_command_buffer/basic_command_buffer.h           | 6 +++---
 .../mutable_command_basic.h                                | 6 +++---
 .../cl_khr_command_buffer_mutable_dispatch/procs.h         | 6 +++---
 .../cl_khr_command_buffer/command_buffer_test_base.h       | 6 +++---
 test_conformance/extensions/cl_khr_command_buffer/procs.h  | 6 +++---
 .../extensions/cl_khr_external_semaphore/procs.h           | 6 +++---
 .../images/kernel_read_write/test_cl_ext_image_buffer.hpp  | 6 +++---
 .../non_uniform_work_group/TestNonUniformWorkGroup.h       | 7 +++----
 test_conformance/non_uniform_work_group/tools.h            | 6 +++---
 test_conformance/pipes/kernels.h                           | 6 +++---
 test_conformance/relationals/test_comparisons_fp.h         | 6 +++---
 16 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h
index 4053b7ee..a42f2917 100644
--- a/test_common/harness/compat.h
+++ b/test_common/harness/compat.h
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#ifndef _COMPAT_H_
-#define _COMPAT_H_
+#ifndef COMPAT_H_
+#define COMPAT_H_
 
 #if defined(_WIN32) && defined(_MSC_VER)
 #include <Windows.h>
@@ -398,4 +398,4 @@ EXTERN_C int __builtin_clz(unsigned int pattern);
 #define sleep(sec) Sleep((sec)*1000)
 #endif
 
-#endif // _COMPAT_H_
+#endif // COMPAT_H_
diff --git a/test_common/harness/crc32.h b/test_common/harness/crc32.h
index 65ca15ee..69587011 100644
--- a/test_common/harness/crc32.h
+++ b/test_common/harness/crc32.h
@@ -15,8 +15,8 @@ Agreement or Khronos Conformance Test Source License Agreement as
 executed between Khronos and the recipient.
 ******************************************************************/
 
-#ifndef _CRC32_H_
-#define _CRC32_H_
+#ifndef CRC32_H_
+#define CRC32_H_
 
 #include <stdint.h>
 #include <stddef.h>
diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index 6c7d0b12..37c37e87 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#ifndef _COMMON_H_
-#define _COMMON_H_
+#ifndef COMMON_H_
+#define COMMON_H_
 
 #include "harness/testHarness.h"
 #include "harness/typeWrappers.h"
@@ -1567,4 +1567,4 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
     return 0;
 }
 
-#endif //_COMMON_H_
+#endif // COMMON_H_
diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h
index 6c4e783a..b865970f 100644
--- a/test_conformance/c11_atomics/host_atomics.h
+++ b/test_conformance/c11_atomics/host_atomics.h
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#ifndef _HOST_ATOMICS_H_
-#define _HOST_ATOMICS_H_
+#ifndef HOST_ATOMICS_H_
+#define HOST_ATOMICS_H_
 
 #include "harness/testHarness.h"
 
@@ -247,4 +247,4 @@ CorrespondingType host_atomic_fetch_max(volatile AtomicType *a, CorrespondingTyp
 bool host_atomic_flag_test_and_set(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order);
 void host_atomic_flag_clear(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order);
 
-#endif //_HOST_ATOMICS_H_
+#endif // HOST_ATOMICS_H_
diff --git a/test_conformance/d3d10/harness.h b/test_conformance/d3d10/harness.h
index 184e52cb..afeb4966 100644
--- a/test_conformance/d3d10/harness.h
+++ b/test_conformance/d3d10/harness.h
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#ifndef _HARNESS_H_
-#define _HARNESS_H_
+#ifndef HARNESS_H_
+#define HARNESS_H_
 
 #define _CRT_SECURE_NO_WARNINGS
 
diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
index b1d36024..44f4cc63 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef _CL_KHR_BASIC_COMMAND_BUFFER_H
-#define _CL_KHR_BASIC_COMMAND_BUFFER_H
+#ifndef CL_KHR_BASIC_COMMAND_BUFFER_H
+#define CL_KHR_BASIC_COMMAND_BUFFER_H
 
 #include "command_buffer_test_base.h"
 #include "harness/typeWrappers.h"
@@ -99,4 +99,4 @@ int MakeAndRunTest(cl_device_id device, cl_context context,
     return TEST_PASS;
 }
 
-#endif // _CL_KHR_BASIC_COMMAND_BUFFER_H
+#endif // CL_KHR_BASIC_COMMAND_BUFFER_H
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
index 9056a00d..96669583 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef _CL_KHR_MUTABLE_COMMAND_BASIC_H
-#define _CL_KHR_MUTABLE_COMMAND_BASIC_H
+#ifndef CL_KHR_MUTABLE_COMMAND_BASIC_H
+#define CL_KHR_MUTABLE_COMMAND_BASIC_H
 
 #include "../basic_command_buffer.h"
 #include "../command_buffer_test_base.h"
@@ -104,4 +104,4 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
     const size_t global_work_size = 4 * sizeof(cl_int);
 };
 
-#endif //_CL_KHR_MUTABLE_COMMAND_BASIC_H
\ No newline at end of file
+#endif // CL_KHR_MUTABLE_COMMAND_BASIC_H
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h
index 08512cae..4b6dacb6 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#ifndef _CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H
-#define _CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H
+#ifndef CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H
+#define CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H
 
 #include <CL/cl.h>
 
@@ -59,4 +59,4 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device,
                                                       cl_context context,
                                                       cl_command_queue queue,
                                                       int num_elements);
-#endif /*_CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H*/
+#endif // CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h
index 0fd2e4ec..48abe25d 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef _CL_KHR_COMMAND_BUFFER_TEST_BASE_H
-#define _CL_KHR_COMMAND_BUFFER_TEST_BASE_H
+#ifndef CL_KHR_COMMAND_BUFFER_TEST_BASE_H
+#define CL_KHR_COMMAND_BUFFER_TEST_BASE_H
 
 #include <CL/cl_ext.h>
 #include "harness/deviceInfo.h"
@@ -174,4 +174,4 @@ public:
     }
 
 
-#endif // _CL_KHR_COMMAND_BUFFER_TEST_BASE_H
+#endif // CL_KHR_COMMAND_BUFFER_TEST_BASE_H
diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h
index 63e004a7..53a7d934 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/procs.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#ifndef _CL_KHR_COMMAND_BUFFER_PROCS_H
-#define _CL_KHR_COMMAND_BUFFER_PROCS_H
+#ifndef CL_KHR_COMMAND_BUFFER_PROCS_H
+#define CL_KHR_COMMAND_BUFFER_PROCS_H
 
 #include <CL/cl.h>
 
@@ -131,4 +131,4 @@ extern int test_event_info_reference_count(cl_device_id device,
                                            cl_command_queue queue,
                                            int num_elements);
 
-#endif /*_CL_KHR_COMMAND_BUFFER_PROCS_H*/
+#endif // CL_KHR_COMMAND_BUFFER_PROCS_H
diff --git a/test_conformance/extensions/cl_khr_external_semaphore/procs.h b/test_conformance/extensions/cl_khr_external_semaphore/procs.h
index 753c8fe2..7e1c4caf 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore/procs.h
+++ b/test_conformance/extensions/cl_khr_external_semaphore/procs.h
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#ifndef _CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H
-#define _CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H
+#ifndef CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H
+#define CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H
 
 #include <CL/cl.h>
 
@@ -79,4 +79,4 @@ extern int test_external_semaphores_invalid_command(cl_device_id deviceID,
                                                     cl_context context,
                                                     cl_command_queue queue,
                                                     int num_elements);
-#endif /* CL_KHR_EXTERNAL_SEMAPHORE */
+#endif // CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
index c6646330..56d15808 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
@@ -14,8 +14,8 @@
 // limitations under the License.
 //
 
-#ifndef _TEST_CL_EXT_IMAGE_BUFFER
-#define _TEST_CL_EXT_IMAGE_BUFFER
+#ifndef TEST_CL_EXT_IMAGE_BUFFER
+#define TEST_CL_EXT_IMAGE_BUFFER
 
 #define TEST_IMAGE_SIZE 20
 
@@ -121,4 +121,4 @@ static inline void image_desc_init(cl_image_desc* desc,
     }
 }
 
-#endif /* _TEST_CL_EXT_IMAGE_BUFFER */
\ No newline at end of file
+#endif // TEST_CL_EXT_IMAGE_BUFFER
diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
index 414d1004..f5846061 100644
--- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#ifndef _TESTNONUNIFORMWORKGROUP_H
-#define _TESTNONUNIFORMWORKGROUP_H
+#ifndef TESTNONUNIFORMWORKGROUP_H
+#define TESTNONUNIFORMWORKGROUP_H
 
 #include "procs.h"
 #include <vector>
@@ -147,5 +147,4 @@ private:
   unsigned int _overallCounter;
 };
 
-#endif // _TESTNONUNIFORMWORKGROUP_H
-
+#endif // TESTNONUNIFORMWORKGROUP_H
diff --git a/test_conformance/non_uniform_work_group/tools.h b/test_conformance/non_uniform_work_group/tools.h
index 2e63c3dd..ba01fc99 100644
--- a/test_conformance/non_uniform_work_group/tools.h
+++ b/test_conformance/non_uniform_work_group/tools.h
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#ifndef _TOOLS_H
-#define _TOOLS_H
+#ifndef TOOLS_H
+#define TOOLS_H
 
 #include "procs.h"
 #include <vector>
@@ -106,4 +106,4 @@ namespace Error {
   };
 
 }
-#endif // _TOOLS_H
+#endif // TOOLS_H
diff --git a/test_conformance/pipes/kernels.h b/test_conformance/pipes/kernels.h
index a2fb70c0..a897e5e8 100644
--- a/test_conformance/pipes/kernels.h
+++ b/test_conformance/pipes/kernels.h
@@ -13,8 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#ifndef _KERNELS_H_
-#define _KERNELS_H_
+#ifndef KERNELS_H_
+#define KERNELS_H_
 
 static const char* pipe_readwrite_struct_kernel_code = {
     "typedef struct{\n"
@@ -127,4 +127,4 @@ static const char* pipe_convenience_readwrite_struct_kernel_code = {
     "    read_pipe(in_pipe, &dst[gid]);\n"
     "}\n" };
 
-#endif //_KERNELS_H_
+#endif // KERNELS_H_
diff --git a/test_conformance/relationals/test_comparisons_fp.h b/test_conformance/relationals/test_comparisons_fp.h
index 66c62c2d..3401163e 100644
--- a/test_conformance/relationals/test_comparisons_fp.h
+++ b/test_conformance/relationals/test_comparisons_fp.h
@@ -14,8 +14,8 @@
 // limitations under the License.
 //
 
-#ifndef _TEST_COMPARISONS_FP_H
-#define _TEST_COMPARISONS_FP_H
+#ifndef TEST_COMPARISONS_FP_H
+#define TEST_COMPARISONS_FP_H
 
 #include <map>
 #include <memory>
@@ -225,4 +225,4 @@ int MakeAndRunTest(cl_device_id device, cl_context context,
     return TEST_PASS;
 }
 
-#endif // _TEST_COMPARISONS_FP_H
+#endif // TEST_COMPARISONS_FP_H