From 5749818906f7a3c3e4237ab0ac80f4d6eb57db3f Mon Sep 17 00:00:00 2001
From: Chuang-Yu Cheng <cy.cheng@imgtec.com>
Date: Wed, 29 Jan 2025 05:33:00 +0900
Subject: [PATCH] math_brute_force: fix `fdim` to use device's rounding when
 converting result back to half. (#2223)

In the half-precision `fdim` test, the original code used `CL_HALF_RTE`
to convert the float result back to half, causing a mismatch in
computation results when the hardware uses RTZ. Some of the examples:
```
  fdim(0x365f, 0xdc63) = fdim( 0.398193f,  -280.75f)     =   281.148193f (RTE=0x5c65, RTZ=0x5c64)
  fdim(0xa4a3, 0xf0e9) = fdim(-0.018112f, 10056.0f)      = 10055.981445f (RTE=0x70e9, RTZ=0x70e8)
  fdim(0x1904, 0x9ab7) = fdim( 0.002449f,    -0.003279f) =     0.005728f (RTE=0x1dde, RTZ=0x1ddd)
```

Fixed this by using the hardware's default rounding mode when converting
the result back to half.
---
 test_conformance/math_brute_force/binary_half.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_half.cpp b/test_conformance/math_brute_force/binary_half.cpp
index 180034ba..70057db5 100644
--- a/test_conformance/math_brute_force/binary_half.cpp
+++ b/test_conformance/math_brute_force/binary_half.cpp
@@ -266,6 +266,7 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
         return CL_SUCCESS;
     }
 
+    cl_half_rounding_mode halfRoundingMode = CL_HALF_RTE;
     FPU_mode_type oldMode;
     oldRoundMode = kRoundToNearestEven;
     if (isFDim)
@@ -275,7 +276,11 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
         if (ftz) ForceFTZ(&oldMode);
 
         // Set the rounding mode to match the device
-        if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
+        if (gIsInRTZMode)
+        {
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
+            halfRoundingMode = CL_HALF_RTZ;
+        }
     }
 
     if (!strcmp(name, "copysign")) copysign_test = 1;
@@ -293,9 +298,9 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
         s2[j] = cl_half_to_float(p2[j]);
         if (isNextafter)
             r[j] = cl_half_from_float(reference_nextafterh(s[j], s2[j]),
-                                      CL_HALF_RTE);
+                                      halfRoundingMode);
         else
-            r[j] = cl_half_from_float(ref_func(s[j], s2[j]), CL_HALF_RTE);
+            r[j] = cl_half_from_float(ref_func(s[j], s2[j]), halfRoundingMode);
     }
 
     if (isFDim && ftz) RestoreFPState(&oldMode);