From 20ab003053e89012e8f4458f507d92864c88e6e0 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Tue, 28 Mar 2023 17:57:03 +0200
Subject: [PATCH] Added cl_half support for test_printf (#1622)

* Added support to test half floats with printf calls (issue #142, printf)

* Added corrections related to rounding and casting halfs (issue #142, printf)

* Reusing similar function (issue #142, printf)

* Corrected path without cl_khr_fp16 support (issue #142, printf)

* Cosmetic fix for order of vector tests (issue #142, printf)

* Added correction related to vendor test review (issue #142, printf)
---
 test_common/harness/kernelHelpers.cpp   |  23 ++-
 test_common/harness/kernelHelpers.h     |   4 +-
 test_conformance/printf/test_printf.cpp | 148 +++++++++++++-
 test_conformance/printf/test_printf.h   |  32 +--
 test_conformance/printf/util_printf.cpp | 249 ++++++++++++++++++++----
 5 files changed, 396 insertions(+), 60 deletions(-)

diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp
index 13ebcbc9..633b05e5 100644
--- a/test_common/harness/kernelHelpers.cpp
+++ b/test_common/harness/kernelHelpers.cpp
@@ -1511,22 +1511,33 @@ size_t get_min_alignment(cl_context context)
     return align_size;
 }
 
-cl_device_fp_config get_default_rounding_mode(cl_device_id device)
+cl_device_fp_config get_default_rounding_mode(cl_device_id device,
+                                              const cl_uint &param)
 {
+    if (param == CL_DEVICE_DOUBLE_FP_CONFIG)
+        test_error_ret(
+            -1,
+            "FAILURE: CL_DEVICE_DOUBLE_FP_CONFIG not supported by this routine",
+            0);
+
     char profileStr[128] = "";
     cl_device_fp_config single = 0;
-    int error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
-                                sizeof(single), &single, NULL);
+    int error = clGetDeviceInfo(device, param, sizeof(single), &single, NULL);
     if (error)
-        test_error_ret(error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG",
-                       0);
+    {
+        std::string message = std::string("Unable to get device ")
+            + std::string(param == CL_DEVICE_HALF_FP_CONFIG
+                              ? "CL_DEVICE_HALF_FP_CONFIG"
+                              : "CL_DEVICE_SINGLE_FP_CONFIG");
+        test_error_ret(error, message.c_str(), 0);
+    }
 
     if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST;
 
     if (0 == (single & CL_FP_ROUND_TO_ZERO))
         test_error_ret(-1,
                        "FAILURE: device must support either "
-                       "CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST",
+                       "CL_FP_ROUND_TO_ZERO or CL_FP_ROUND_TO_NEAREST",
                        0);
 
     // Make sure we are an embedded device before allowing a pass
diff --git a/test_common/harness/kernelHelpers.h b/test_common/harness/kernelHelpers.h
index 4d8f2a8f..62a07e49 100644
--- a/test_common/harness/kernelHelpers.h
+++ b/test_common/harness/kernelHelpers.h
@@ -159,7 +159,9 @@ size_t get_min_alignment(cl_context context);
 
 /* Helper to obtain the default rounding mode for single precision computation.
  * (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
-cl_device_fp_config get_default_rounding_mode(cl_device_id device);
+cl_device_fp_config
+get_default_rounding_mode(cl_device_id device,
+                          const cl_uint &param = CL_DEVICE_SINGLE_FP_CONFIG);
 
 #define PASSIVE_REQUIRE_IMAGE_SUPPORT(device)                                  \
     if (checkForImageSupport(device))                                          \
diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index d638cd46..c4b6a0ba 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -13,7 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
 
 #include <string.h>
 #include <errno.h>
@@ -40,7 +39,6 @@
 #include "harness/testHarness.h"
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/mt19937.h"
 #include "harness/parseParameters.h"
 
 #include <CL/cl_ext.h>
@@ -237,10 +235,13 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
     char testname[256] = {0};
     char addrSpaceArgument[256] = {0};
     char addrSpacePAddArgument[256] = {0};
+    char extension[128] = { 0 };
 
     //Program Source code for int,float,octal,hexadecimal,char,string
-    const char *sourceGen[] = {
-        "__kernel void ", testname,
+    const char* sourceGen[] = {
+        extension,
+        "__kernel void ",
+        testname,
         "(void)\n",
         "{\n"
         "   printf(\"",
@@ -251,8 +252,10 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
         "}\n"
     };
     //Program Source code for vector
-    const char *sourceVec[] = {
-        "__kernel void ", testname,
+    const char* sourceVec[] = {
+        extension,
+        "__kernel void ",
+        testname,
         "(void)\n",
         "{\n",
         allTestCase[testId]->_genParameters[testNum].dataType,
@@ -289,6 +292,11 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
     //Update testname
     sprintf(testname,"%s%d","test",testId);
 
+    if (allTestCase[testId]->_type == TYPE_HALF
+        || allTestCase[testId]->_type == TYPE_HALF_LIMITS)
+        strcpy(extension, "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n");
+
+
     //Update addrSpaceArgument and addrSpacePAddArgument types, based on FULL_PROFILE/EMBEDDED_PROFILE
     if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE)
     {
@@ -304,6 +312,12 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
 
     if(allTestCase[testId]->_type == TYPE_VECTOR)
     {
+        if (strcmp(allTestCase[testId]->_genParameters[testNum].dataType,
+                   "half")
+            == 0)
+            strcpy(extension,
+                   "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n");
+
         err = create_single_kernel_helper(
             context, &program, kernel_ptr,
             sizeof(sourceVec) / sizeof(sourceVec[0]), sourceVec, testname);
@@ -404,8 +418,27 @@ static bool is64bAddressSpace(cl_device_id  device_id)
 //-----------------------------------------
 static int doTest(cl_command_queue queue, cl_context context, const unsigned int testId, const unsigned int testNum, cl_device_id device)
 {
+    if ((allTestCase[testId]->_type == TYPE_HALF
+         || allTestCase[testId]->_type == TYPE_HALF_LIMITS)
+        && !is_extension_available(device, "cl_khr_fp16"))
+    {
+        log_info(
+            "Skipping half because cl_khr_fp16 extension is not supported.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
     if(allTestCase[testId]->_type == TYPE_VECTOR)
     {
+        if ((strcmp(allTestCase[testId]->_genParameters[testNum].dataType,
+                    "half")
+             == 0)
+            && !is_extension_available(device, "cl_khr_fp16"))
+        {
+            log_info("Skipping half because cl_khr_fp16 extension is not "
+                     "supported.\n");
+            return TEST_SKIPPED_ITSELF;
+        }
+
         log_info("%d)testing printf(\"%sv%s%s\",%s)\n",testNum,allTestCase[testId]->_genParameters[testNum].vectorFormatFlag,allTestCase[testId]->_genParameters[testNum].vectorSize,
                  allTestCase[testId]->_genParameters[testNum].vectorFormatSpecifier,allTestCase[testId]->_genParameters[testNum].dataRepresentation);
     }
@@ -614,6 +647,75 @@ int test_int_8(cl_device_id deviceID, cl_context context, cl_command_queue queue
 }
 
 
+int test_half_0(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 0, deviceID);
+}
+int test_half_1(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 1, deviceID);
+}
+int test_half_2(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 2, deviceID);
+}
+int test_half_3(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 3, deviceID);
+}
+int test_half_4(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 4, deviceID);
+}
+int test_half_5(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 5, deviceID);
+}
+int test_half_6(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 6, deviceID);
+}
+int test_half_7(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 7, deviceID);
+}
+int test_half_8(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 8, deviceID);
+}
+int test_half_9(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 9, deviceID);
+}
+
+
+int test_half_limits_0(cl_device_id deviceID, cl_context context,
+                       cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF_LIMITS, 0, deviceID);
+}
+int test_half_limits_1(cl_device_id deviceID, cl_context context,
+                       cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF_LIMITS, 1, deviceID);
+}
+int test_half_limits_2(cl_device_id deviceID, cl_context context,
+                       cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF_LIMITS, 2, deviceID);
+}
+
+
 int test_float_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return doTest(gQueue, gContext, TYPE_FLOAT, 0, deviceID);
@@ -800,6 +902,11 @@ int test_vector_4(cl_device_id deviceID, cl_context context, cl_command_queue qu
 {
     return doTest(gQueue, gContext, TYPE_VECTOR, 4, deviceID);
 }
+int test_vector_5(cl_device_id deviceID, cl_context context,
+                  cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_VECTOR, 5, deviceID);
+}
 
 
 int test_address_space_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
@@ -855,6 +962,15 @@ test_definition test_list[] = {
     ADD_TEST(int_6),           ADD_TEST(int_7),
     ADD_TEST(int_8),
 
+    ADD_TEST(half_0),          ADD_TEST(half_1),
+    ADD_TEST(half_2),          ADD_TEST(half_3),
+    ADD_TEST(half_4),          ADD_TEST(half_5),
+    ADD_TEST(half_6),          ADD_TEST(half_7),
+    ADD_TEST(half_8),          ADD_TEST(half_9),
+
+    ADD_TEST(half_limits_0),   ADD_TEST(half_limits_1),
+    ADD_TEST(half_limits_2),
+
     ADD_TEST(float_0),         ADD_TEST(float_1),
     ADD_TEST(float_2),         ADD_TEST(float_3),
     ADD_TEST(float_4),         ADD_TEST(float_5),
@@ -885,7 +1001,7 @@ test_definition test_list[] = {
 
     ADD_TEST(vector_0),        ADD_TEST(vector_1),
     ADD_TEST(vector_2),        ADD_TEST(vector_3),
-    ADD_TEST(vector_4),
+    ADD_TEST(vector_4),        ADD_TEST(vector_5),
 
     ADD_TEST(address_space_0), ADD_TEST(address_space_1),
     ADD_TEST(address_space_2), ADD_TEST(address_space_3),
@@ -1056,6 +1172,24 @@ test_status InitCL( cl_device_id device )
 
     releaseOutputStream(gFd);
 
+    if (is_extension_available(device, "cl_khr_fp16"))
+    {
+        const cl_device_fp_config fpConfigHalf =
+            get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG);
+        if (fpConfigHalf == CL_FP_ROUND_TO_NEAREST)
+        {
+            half_rounding_mode = CL_HALF_RTE;
+        }
+        else if (fpConfigHalf == CL_FP_ROUND_TO_ZERO)
+        {
+            half_rounding_mode = CL_HALF_RTZ;
+        }
+        else
+        {
+            log_error("Error while acquiring half rounding mode");
+        }
+    }
+
     // Generate reference results
     generateRef(device);
 
diff --git a/test_conformance/printf/test_printf.h b/test_conformance/printf/test_printf.h
index 038a7b9c..8eb2a032 100644
--- a/test_conformance/printf/test_printf.h
+++ b/test_conformance/printf/test_printf.h
@@ -32,6 +32,8 @@
 #include <CL/cl_platform.h>
 #endif
 
+#include <CL/cl_half.h>
+
 #define ANALYSIS_BUFFER_SIZE 256
 
 //-----------------------------------------
@@ -42,18 +44,20 @@
 // Types
 //-----------------------------------------
 enum PrintfTestType
- {
-     TYPE_INT,
-     TYPE_FLOAT,
-     TYPE_FLOAT_LIMITS,
-     TYPE_OCTAL,
-     TYPE_UNSIGNED,
-     TYPE_HEXADEC,
-     TYPE_CHAR,
-     TYPE_STRING,
-     TYPE_VECTOR,
-     TYPE_ADDRESS_SPACE,
-     TYPE_COUNT
+{
+    TYPE_INT,
+    TYPE_HALF,
+    TYPE_HALF_LIMITS,
+    TYPE_FLOAT,
+    TYPE_FLOAT_LIMITS,
+    TYPE_OCTAL,
+    TYPE_UNSIGNED,
+    TYPE_HEXADEC,
+    TYPE_CHAR,
+    TYPE_STRING,
+    TYPE_VECTOR,
+    TYPE_ADDRESS_SPACE,
+    TYPE_COUNT
 };
 
 struct printDataGenParameters
@@ -72,6 +76,7 @@ struct printDataGenParameters
 
 // Reference results - filled out at run-time
 static std::vector<std::string> correctBufferInt;
+static std::vector<std::string> correctBufferHalf;
 static std::vector<std::string> correctBufferFloat;
 static std::vector<std::string> correctBufferOctal;
 static std::vector<std::string> correctBufferUnsigned;
@@ -103,6 +108,9 @@ struct testCase
 
 extern const char* strType[];
 extern std::vector<testCase*> allTestCase;
+extern cl_half_rounding_mode half_rounding_mode;
+
+//-----------------------------------------
 
 size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId,cl_ulong pAddr = 0);
 
diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp
index d45e1d43..6b310a99 100644
--- a/test_conformance/printf/util_printf.cpp
+++ b/test_conformance/printf/util_printf.cpp
@@ -13,15 +13,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
 #include "harness/rounding_mode.h"
 #include "harness/kernelHelpers.h"
 
 #include "test_printf.h"
 #include <assert.h>
+#include <CL/cl_half.h>
+
 
 // Helpers for generating runtime reference results
 static void intRefBuilder(printDataGenParameters&, char*, const size_t);
+static void halfRefBuilder(printDataGenParameters&, char* rResult,
+                           const size_t);
 static void floatRefBuilder(printDataGenParameters&, char* rResult, const size_t);
 static void octalRefBuilder(printDataGenParameters&, char*, const size_t);
 static void unsignedRefBuilder(printDataGenParameters&, char*, const size_t);
@@ -100,7 +103,150 @@ testCase testCaseInt = {
 };
 
 
+//==============================================
 
+// half
+
+//==============================================
+
+//--------------------------------------------------------
+
+// [string] format |  [string] float-data representation |
+
+//--------------------------------------------------------
+
+std::vector<printDataGenParameters> printHalfGenParameters = {
+
+    // Default(right)-justified
+
+    { "%f", "1.234h" },
+
+    // One position after the decimal,default(right)-justified
+
+    { "%4.2f", "1.2345h" },
+
+    // Zero positions after the
+    // decimal([floor]rounding),default(right)-justified
+
+    { "%.0f", "0.1h" },
+
+    // Zero positions after the decimal([ceil]rounding),default(right)-justified
+
+    { "%.0f", "0.6h" },
+
+    // Zero-filled,default positions number after the
+    // decimal,default(right)-justified
+
+    { "%0f", "0.6h" },
+
+    // Double argument representing floating-point,used by f
+    // style,default(right)-justified
+
+    { "%4g", "5.678h" },
+
+    // Double argument representing floating-point,used by e
+    // style,default(right)-justified
+
+    { "%4.2g", "5.678h" },
+
+    // Double argument representing floating-point,used by e
+    // style,default(right)-justified
+
+    { "%4G", "0.000062h" },
+
+    // Double argument representing floating-point,with
+    // exponent,left-justified,default(right)-justified
+
+    { "%-#20.15e", "65504.0h" },
+
+    // Double argument representing floating-point,with
+    // exponent,left-justified,with sign,capital E,default(right)-justified
+
+    { "%+#21.15E", "-65504.0h" },
+};
+
+//---------------------------------------------------------
+
+// Test case for float                                     |
+
+//---------------------------------------------------------
+
+testCase testCaseHalf = {
+
+    TYPE_HALF,
+
+    correctBufferHalf,
+
+    printHalfGenParameters,
+
+    halfRefBuilder,
+
+    kfloat
+
+};
+
+
+//==============================================
+
+// half limits
+
+//==============================================
+
+
+//--------------------------------------------------------
+
+// [string] format |  [string] float-data representation |
+
+//--------------------------------------------------------
+
+
+std::vector<printDataGenParameters> printHalfLimitsGenParameters = {
+
+    // Infinity (1.0/0.0)
+
+    { "%f", "1.0h/0.0h" },
+
+    // NaN
+
+    { "%f", "sqrt(-1.0h)" },
+
+    // NaN
+    { "%f", "acospi(2.0h)" }
+
+};
+//--------------------------------------------------------
+
+//  Lookup table - [string]float-correct buffer             |
+
+//--------------------------------------------------------
+
+std::vector<std::string> correctBufferHalfLimits = {
+
+    "inf",
+
+    "-nan",
+
+    "nan"
+
+};
+
+//---------------------------------------------------------
+
+// Test case for float                                     |
+
+//---------------------------------------------------------
+
+testCase testCaseHalfLimits = {
+
+    TYPE_HALF_LIMITS,
+
+    correctBufferHalfLimits,
+
+    printHalfLimitsGenParameters,
+
+    NULL
+
+};
 
 
 //==============================================
@@ -229,17 +375,18 @@ testCase testCaseFloat = {
 
 std::vector<printDataGenParameters> printFloatLimitsGenParameters = {
 
-    //Infinity (1.0/0.0)
+    // Infinity (1.0/0.0)
 
-    {"%f","1.0f/0.0f"},
+    { "%f", "1.0f/0.0f" },
 
-    //NaN
+    // NaN
 
-    {"%f","sqrt(-1.0f)"},
+    { "%f", "sqrt(-1.0f)" },
 
-    //NaN
-    {"%f","acospi(2.0f)"}
-    };
+    // NaN
+    { "%f", "acospi(2.0f)" }
+
+};
 //--------------------------------------------------------
 
 //  Lookup table - [string]float-correct buffer             |
@@ -253,6 +400,7 @@ std::vector<std::string> correctBufferFloatLimits = {
     "-nan",
 
     "nan"
+
 };
 
 //---------------------------------------------------------
@@ -593,24 +741,27 @@ std::vector<printDataGenParameters> printVectorGenParameters = {
 
     //(Minimum)Two-wide,two positions after decimal
 
-    {NULL,"(1.0f,2.0f,3.0f,4.0f)","%2.2","hlf","float","4"},
+    { NULL, "(1.0f,2.0f,3.0f,4.0f)", "%2.2", "hlf", "float", "4" },
 
-    //Alternative form,uchar argument
+    // Alternative form,uchar argument
 
-    {NULL,"(0xFA,0xFB)","%#","hhx","uchar","2"},
+    { NULL, "(0xFA,0xFB)", "%#", "hhx", "uchar", "2" },
 
-    //Alternative form,ushort argument
+    // Alternative form,ushort argument
 
-    {NULL,"(0x1234,0x8765)","%#","hx","ushort","2"},
+    { NULL, "(0x1234,0x8765)", "%#", "hx", "ushort", "2" },
 
-  //Alternative form,uint argument
+    // Alternative form,uint argument
 
-    {NULL,"(0x12345678,0x87654321)","%#","hlx","uint","2"},
+    { NULL, "(0x12345678,0x87654321)", "%#", "hlx", "uint", "2" },
 
-    //Alternative form,long argument
+    // Alternative form,long argument
 
-    {NULL,"(12345678,98765432)","%","ld","long","2"}
+    { NULL, "(12345678,98765432)", "%", "ld", "long", "2" },
 
+    //(Minimum)Two-wide,two positions after decimal
+
+    { NULL, "(1.0h,2.0h,3.0h,4.0h)", "%2.2", "hf", "half", "4" }
 };
 
 //------------------------------------------------------------
@@ -627,9 +778,11 @@ std::vector<std::string> correctBufferVector = {
 
     "0x1234,0x8765",
 
-  "0x12345678,0x87654321",
+    "0x12345678,0x87654321",
 
-    "12345678,98765432"
+    "12345678,98765432",
+
+    "1.00,2.00,3.00,4.00"
 
 };
 
@@ -731,8 +884,16 @@ testCase testCaseAddrSpace = {
 
 //-------------------------------------------------------------------------------
 
-std::vector<testCase*> allTestCase = {&testCaseInt,&testCaseFloat,&testCaseFloatLimits,&testCaseOctal,&testCaseUnsigned,&testCaseHexadecimal,&testCaseChar,&testCaseString,&testCaseVector,&testCaseAddrSpace};
+std::vector<testCase*> allTestCase = {
+    &testCaseInt,      &testCaseHalf,        &testCaseHalfLimits,
+    &testCaseFloat,    &testCaseFloatLimits, &testCaseOctal,
+    &testCaseUnsigned, &testCaseHexadecimal, &testCaseChar,
+    &testCaseString,   &testCaseVector,      &testCaseAddrSpace
+};
 
+//-----------------------------------------
+
+cl_half_rounding_mode half_rounding_mode = CL_HALF_RTE;
 
 //-----------------------------------------
 
@@ -807,6 +968,14 @@ static void intRefBuilder(printDataGenParameters& params, char* refResult, const
     snprintf(refResult, refSize, params.genericFormat, atoi(params.dataRepresentation));
 }
 
+static void halfRefBuilder(printDataGenParameters& params, char* refResult,
+                           const size_t refSize)
+{
+    cl_half val = cl_half_from_float(strtof(params.dataRepresentation, NULL),
+                                     half_rounding_mode);
+    snprintf(refResult, refSize, params.genericFormat, cl_half_to_float(val));
+}
+
 static void floatRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
 {
     snprintf(refResult, refSize, params.genericFormat, strtof(params.dataRepresentation, NULL));
@@ -842,24 +1011,30 @@ static void hexRefBuilder(printDataGenParameters& params, char* refResult, const
 */
 void generateRef(const cl_device_id device)
 {
-    const cl_device_fp_config fpConfig = get_default_rounding_mode(device);
+    const cl_device_fp_config fpConfigSingle =
+        get_default_rounding_mode(device);
+    const cl_device_fp_config fpConfigHalf = (half_rounding_mode == CL_HALF_RTE)
+        ? CL_FP_ROUND_TO_NEAREST
+        : CL_FP_ROUND_TO_ZERO;
     const RoundingMode hostRound = get_round();
-    RoundingMode deviceRound;
 
     // Map device rounding to CTS rounding type
     // get_default_rounding_mode supports RNE and RTZ
-    if (fpConfig == CL_FP_ROUND_TO_NEAREST)
-    {
-        deviceRound = kRoundToNearestEven;
-    }
-    else if (fpConfig == CL_FP_ROUND_TO_ZERO)
-    {
-        deviceRound = kRoundTowardZero;
-    }
-    else
-    {
-        assert(false && "Unreachable");
-    }
+    auto get_rounding = [](const cl_device_fp_config& fpConfig) {
+        if (fpConfig == CL_FP_ROUND_TO_NEAREST)
+        {
+            return kRoundToNearestEven;
+        }
+        else if (fpConfig == CL_FP_ROUND_TO_ZERO)
+        {
+            return kRoundTowardZero;
+        }
+        else
+        {
+            assert(false && "Unreachable");
+        }
+        return kDefaultRoundingMode;
+    };
 
     // Loop through all test cases
     for (auto &caseToTest: allTestCase)
@@ -875,6 +1050,12 @@ void generateRef(const cl_device_id device)
         // Make sure the reference result is empty
         assert(caseToTest->_correctBuffer.size() == 0);
 
+        const cl_device_fp_config* fpConfig = &fpConfigSingle;
+        if (caseToTest->_type == TYPE_HALF
+            || caseToTest->_type == TYPE_HALF_LIMITS)
+            fpConfig = &fpConfigHalf;
+        RoundingMode deviceRound = get_rounding(*fpConfig);
+
         // Loop through each input
         for (auto &params: caseToTest->_genParameters)
         {