From 8d9d1f3e9da069cf5d224025160628ab3911ba00 Mon Sep 17 00:00:00 2001
From: Nikhil Joshi <nikhilj@nvidia.com>
Date: Tue, 5 Jul 2022 22:28:18 +0530
Subject: [PATCH]  Fix math tests to allow ftz in relaxed mode. (#1371)

* Fix math tests to allow ftz in relaxed mode.

In recent spec clarification, it is agreed that ftz is
a valid optimization in case of cl-fast-math-relaxed
and doesn't require cl-denorms-are-zero to be passed
explicitly to enforce ftz behavior for implementations
that already support this.

GitHub Spec Issue OpenCL-Docs#579
GitHub Spec Issue OpenCL-Docs#597
GitHub CTS Issue OpenCL-CTS#1267
---
 test_conformance/math_brute_force/binary_double.cpp       | 4 +++-
 test_conformance/math_brute_force/binary_float.cpp        | 4 ++--
 test_conformance/math_brute_force/binary_i_double.cpp     | 6 +++++-
 test_conformance/math_brute_force/binary_i_float.cpp      | 7 +++++--
 .../math_brute_force/binary_operator_double.cpp           | 3 ++-
 .../math_brute_force/binary_operator_float.cpp            | 6 +++---
 .../math_brute_force/binary_two_results_i_double.cpp      | 2 +-
 .../math_brute_force/binary_two_results_i_float.cpp       | 2 +-
 test_conformance/math_brute_force/i_unary_double.cpp      | 2 +-
 test_conformance/math_brute_force/i_unary_float.cpp       | 2 +-
 test_conformance/math_brute_force/macro_binary_double.cpp | 8 ++++++--
 test_conformance/math_brute_force/macro_binary_float.cpp  | 8 ++++++--
 test_conformance/math_brute_force/macro_unary_double.cpp  | 8 ++++++--
 test_conformance/math_brute_force/macro_unary_float.cpp   | 8 ++++++--
 test_conformance/math_brute_force/ternary_double.cpp      | 2 +-
 test_conformance/math_brute_force/ternary_float.cpp       | 2 +-
 test_conformance/math_brute_force/unary_double.cpp        | 3 ++-
 test_conformance/math_brute_force/unary_float.cpp         | 2 +-
 .../math_brute_force/unary_two_results_double.cpp         | 2 +-
 .../math_brute_force/unary_two_results_float.cpp          | 4 ++--
 .../math_brute_force/unary_two_results_i_double.cpp       | 2 +-
 .../math_brute_force/unary_two_results_i_float.cpp        | 2 +-
 test_conformance/math_brute_force/unary_u_double.cpp      | 2 +-
 test_conformance/math_brute_force/unary_u_float.cpp       | 2 +-
 24 files changed, 60 insertions(+), 33 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index a2b7d28b..ec8eb300 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -297,6 +297,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     MTdata d = tinfo->d;
     cl_int error;
     const char *name = job->f->name;
@@ -481,7 +482,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 float err = Bruteforce_Ulp_Error_Double(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
-                if (fail && ftz)
+                if (fail && (ftz || relaxedMode))
                 {
                     // retry per section 6.5.3.2
                     if (IsDoubleResultSubnormal(correct, ulps))
@@ -680,6 +681,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     test_info.f = f;
     test_info.ulps = f->double_ulps;
     test_info.ftz = f->ftz || gForceFTZ;
+    test_info.relaxedMode = relaxedMode;
 
     test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
     test_info.skipNanInf = 0;
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index 97712ee8..a706f772 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -461,7 +461,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     {
         // Calculate the correctly rounded reference result
         memset(&oldMode, 0, sizeof(oldMode));
-        if (ftz) ForceFTZ(&oldMode);
+        if (ftz || relaxedMode) ForceFTZ(&oldMode);
 
         // Set the rounding mode to match the device
         if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
@@ -546,7 +546,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     float err = Ulp_Error(test, correct);
                     int fail = !(fabsf(err) <= ulps);
 
-                    if (fail && ftz)
+                    if (fail && (ftz || relaxedMode))
                     {
                         // retry per section 6.5.3.2
                         if (IsFloatResultSubnormal(correct, ulps))
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index f15c21ed..23a729e0 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -164,6 +164,8 @@ struct TestInfo
     cl_uint scale; // stride between individual test values
     float ulps; // max_allowed ulps
     int ftz; // non-zero if running in flush to zero mode
+    bool relaxedMode; // True if test is running in relaxed mode, false
+                      // otherwise.
 
     // no special values
 };
@@ -300,6 +302,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     MTdata d = tinfo->d;
     cl_int error;
     const char *name = job->f->name;
@@ -482,7 +485,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 float err = Bruteforce_Ulp_Error_Double(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
-                if (fail && ftz)
+                if (fail && (ftz || relaxedMode))
                 {
                     // retry per section 6.5.3.2
                     if (IsDoubleResultSubnormal(correct, ulps))
@@ -601,6 +604,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
     test_info.f = f;
     test_info.ulps = f->double_ulps;
     test_info.ftz = f->ftz || gForceFTZ;
+    test_info.relaxedMode = relaxedMode;
 
     // cl_kernels aren't thread safe, so we make one for each vector size for
     // every thread
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index 9e27b007..0cf7494f 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -162,7 +162,8 @@ struct TestInfo
     cl_uint scale; // stride between individual test values
     float ulps; // max_allowed ulps
     int ftz; // non-zero if running in flush to zero mode
-
+    bool relaxedMode; // True if test is running in relaxed mode, false
+                      // otherwise.
     // no special values
 };
 
@@ -291,6 +292,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     float ulps = job->ulps;
     MTdata d = tinfo->d;
     cl_int error;
@@ -473,7 +475,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 float err = Ulp_Error(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
-                if (fail && ftz)
+                if (fail && (ftz || relaxedMode))
                 {
                     // retry per section 6.5.3.2
                     if (IsFloatResultSubnormal(correct, ulps))
@@ -595,6 +597,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
     test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
     test_info.ftz =
         f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    test_info.relaxedMode = relaxedMode;
 
     // cl_kernels aren't thread safe, so we make one for each vector size for
     // every thread
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index c407fdaa..f90a4d64 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -294,6 +294,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     MTdata d = tinfo->d;
     cl_int error;
     const char *name = job->f->name;
@@ -476,7 +477,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 float err = Bruteforce_Ulp_Error_Double(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
-                if (fail && ftz)
+                if (fail && (ftz || relaxedMode))
                 {
                     // retry per section 6.5.3.2
                     if (IsDoubleResultSubnormal(correct, ulps))
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 7fbb07c2..535d7209 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -456,7 +456,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     // Calculate the correctly rounded reference result
     FPU_mode_type oldMode;
     memset(&oldMode, 0, sizeof(oldMode));
-    if (ftz) ForceFTZ(&oldMode);
+    if (ftz || relaxedMode) ForceFTZ(&oldMode);
 
     // Set the rounding mode to match the device
     oldRoundMode = kRoundToNearestEven;
@@ -484,7 +484,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
 
-    if (ftz) RestoreFPState(&oldMode);
+    if (ftz || relaxedMode) RestoreFPState(&oldMode);
 
     // Read the data back -- no need to wait for the first N-1 buffers but wait
     // for the last buffer. This is an in order queue.
@@ -541,7 +541,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     ((!(fabsf(err) <= ulps)) && (!(fabsf(errB) <= ulps)));
                 if (fabsf(errB) < fabsf(err)) err = errB;
 
-                if (fail && ftz)
+                if (fail && (ftz || relaxedMode))
                 {
                     // retry per section 6.5.3.2
                     if (IsFloatResultSubnormal(correct, ulps))
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
index 43dc1d30..be7064e4 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -379,7 +379,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
                 if (iptrUndefined) iErr = 0;
 
                 int fail = !(fabsf(err) <= f->double_ulps && iErr == 0);
-                if (ftz && fail)
+                if ((ftz || relaxedMode) && fail)
                 {
                     // retry per section 6.5.3.2
                     if (IsDoubleResultSubnormal(correct, f->double_ulps))
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
index 83ceeaab..901c8598 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -379,7 +379,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                 if (iptrUndefined) iErr = 0;
 
                 int fail = !(fabsf(err) <= float_ulps && iErr == 0);
-                if (ftz && fail)
+                if ((ftz || relaxedMode) && fail)
                 {
                     // retry per section 6.5.3.2
                     if (IsFloatResultSubnormal(correct, float_ulps))
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index d09e14c1..f07dd78d 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -248,7 +248,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                 // If we aren't getting the correctly rounded result
                 if (t[j] != q[j])
                 {
-                    if (ftz && IsDoubleSubnormal(s[j]))
+                    if ((ftz || relaxedMode) && IsDoubleSubnormal(s[j]))
                     {
                         unsigned int correct0 = f->dfunc.i_f(0.0);
                         unsigned int correct1 = f->dfunc.i_f(-0.0);
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index 89b566d9..c38bdcf9 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -245,7 +245,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                 // If we aren't getting the correctly rounded result
                 if (t[j] != q[j])
                 {
-                    if (ftz && IsFloatSubnormal(s[j]))
+                    if ((ftz || relaxedMode) && IsFloatSubnormal(s[j]))
                     {
                         unsigned int correct0 = f->func.i_f(0.0);
                         unsigned int correct1 = f->func.i_f(-0.0);
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index d3e8071f..bb036a24 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -157,6 +157,8 @@ struct TestInfo
     cl_uint step; // step between each chunk and the next.
     cl_uint scale; // stride between individual test values
     int ftz; // non-zero if running in flush to zero mode
+    bool relaxedMode; // True if test is running in relaxed mode, false
+                      // otherwise.
 };
 
 // A table of more difficult cases to get right
@@ -282,6 +284,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     dptr dfunc = job->f->dfunc;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     MTdata d = tinfo->d;
     cl_int error;
     const char *name = job->f->name;
@@ -455,7 +458,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if (gMinVectorSizeIndex == 0 && t[j] != q[j])
         {
             // If we aren't getting the correctly rounded result
-            if (ftz)
+            if (ftz || relaxedMode)
             {
                 if (IsDoubleSubnormal(s[j]))
                 {
@@ -503,7 +506,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             // If we aren't getting the correctly rounded result
             if (-t[j] != q[j])
             {
-                if (ftz)
+                if (ftz || relaxedMode)
                 {
                     if (IsDoubleSubnormal(s[j]))
                     {
@@ -607,6 +610,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 
     test_info.f = f;
     test_info.ftz = f->ftz || gForceFTZ;
+    test_info.relaxedMode = relaxedMode;
 
     // cl_kernels aren't thread safe, so we make one for each vector size for
     // every thread
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index 6c7c8c05..f8cfc9b7 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -155,6 +155,8 @@ struct TestInfo
     cl_uint step; // step between each chunk and the next.
     cl_uint scale; // stride between individual test values
     int ftz; // non-zero if running in flush to zero mode
+    bool relaxedMode; // True if test is running in relaxed mode, false
+                      // otherwise.
 };
 
 // A table of more difficult cases to get right
@@ -272,6 +274,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     MTdata d = tinfo->d;
     cl_int error;
     const char *name = job->f->name;
@@ -445,7 +448,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
         if (gMinVectorSizeIndex == 0 && t[j] != q[j])
         {
-            if (ftz)
+            if (ftz || relaxedMode)
             {
                 if (IsFloatSubnormal(s[j]))
                 {
@@ -492,7 +495,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             // If we aren't getting the correctly rounded result
             if (-t[j] != q[j])
             {
-                if (ftz)
+                if (ftz || relaxedMode)
                 {
                     if (IsFloatSubnormal(s[j]))
                     {
@@ -596,6 +599,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     test_info.f = f;
     test_info.ftz =
         f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    test_info.relaxedMode = relaxedMode;
 
     // cl_kernels aren't thread safe, so we make one for each vector size for
     // every thread
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 7f3521c6..0e71f8a0 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -149,6 +149,8 @@ struct TestInfo
     cl_uint step; // step between each chunk and the next.
     cl_uint scale; // stride between individual test values
     int ftz; // non-zero if running in flush to zero mode
+    bool relaxedMode; // True if test is running in relaxed mode, false
+                      // otherwise.
 };
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
@@ -161,6 +163,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     dptr dfunc = job->f->dfunc;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     cl_int error;
     const char *name = job->f->name;
 
@@ -286,7 +289,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if (gMinVectorSizeIndex == 0 && t[j] != q[j])
         {
             // If we aren't getting the correctly rounded result
-            if (ftz)
+            if (ftz || relaxedMode)
             {
                 if (IsDoubleSubnormal(s[j]))
                 {
@@ -311,7 +314,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             // If we aren't getting the correctly rounded result
             if (-t[j] != q[j])
             {
-                if (ftz)
+                if (ftz || relaxedMode)
                 {
                     if (IsDoubleSubnormal(s[j]))
                     {
@@ -392,6 +395,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 
     test_info.f = f;
     test_info.ftz = f->ftz || gForceFTZ;
+    test_info.relaxedMode = relaxedMode;
 
     // cl_kernels aren't thread safe, so we make one for each vector size for
     // every thread
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index 0cd54de4..3b53bdb0 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -148,6 +148,8 @@ struct TestInfo
     cl_uint step; // step between each chunk and the next.
     cl_uint scale; // stride between individual test values
     int ftz; // non-zero if running in flush to zero mode
+    bool relaxedMode; // True if test is running in relaxed mode, false
+                      // otherwise.
 };
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
@@ -160,6 +162,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     cl_int error = CL_SUCCESS;
     cl_int ret = CL_SUCCESS;
     const char *name = job->f->name;
@@ -290,7 +293,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (gMinVectorSizeIndex == 0 && t[j] != q[j])
             {
                 // If we aren't getting the correctly rounded result
-                if (ftz)
+                if (ftz || relaxedMode)
                 {
                     if (IsFloatSubnormal(s[j]))
                     {
@@ -316,7 +319,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 // If we aren't getting the correctly rounded result
                 if (-t[j] != q[j])
                 {
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         if (IsFloatSubnormal(s[j]))
                         {
@@ -406,6 +409,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
     test_info.f = f;
     test_info.ftz =
         f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    test_info.relaxedMode = relaxedMode;
 
     // cl_kernels aren't thread safe, so we make one for each vector size for
     // every thread
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index 8af136ac..a3db3353 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -391,7 +391,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
                     float err = Bruteforce_Ulp_Error_Double(test, correct);
                     int fail = !(fabsf(err) <= f->double_ulps);
 
-                    if (fail && ftz)
+                    if (fail && (ftz || relaxedMode))
                     {
                         // retry per section 6.5.3.2
                         if (IsDoubleSubnormal(correct))
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index c69083ad..fdcb48c4 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -443,7 +443,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                     err = Ulp_Error(test, correct);
                     fail = !(fabsf(err) <= float_ulps);
 
-                    if (fail && ftz)
+                    if (fail && (ftz || relaxedMode))
                     {
                         float correct2, err2;
 
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 2d455047..3430fe34 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -172,6 +172,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     dptr func = job->f->dfunc;
     cl_int error;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
 
     Force64BitFPUPrecision();
 
@@ -305,7 +306,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
                 if (fail)
                 {
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         // retry per section 6.5.3.2
                         if (IsDoubleResultSubnormal(correct, ulps))
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 83d27b0b..02a5c2cf 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -435,7 +435,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
                 if (fail)
                 {
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         typedef int (*CheckForSubnormal)(
                             double, float); // If we are in fast relaxed math,
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
index 8757fbc4..5556a080 100644
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -291,7 +291,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
                     float err2 = Bruteforce_Ulp_Error_Double(test2, correct2);
                     int fail = !(fabsf(err) <= f->double_ulps
                                  && fabsf(err2) <= f->double_ulps);
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         // retry per section 6.5.3.2
                         if (IsDoubleResultSubnormal(correct, f->double_ulps))
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index a54bd024..c95b10d3 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -258,7 +258,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
         {
             // Calculate the correctly rounded reference result
             memset(&oldMode, 0, sizeof(oldMode));
-            if (ftz) ForceFTZ(&oldMode);
+            if (ftz || relaxedMode) ForceFTZ(&oldMode);
 
             // Set the rounding mode to match the device
             if (gIsInRTZMode)
@@ -385,7 +385,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                     int fail = !(fabsf(err) <= float_ulps
                                  && fabsf(err2) <= float_ulps);
 
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         // retry per section 6.5.3.2
                         if ((*isFloatResultSubnormalPtr)(correct, float_ulps))
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
index 9ed77dce..c976061c 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -294,7 +294,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
                     cl_long iErr = (long long)q2[j] - (long long)correct2;
                     int fail = !(fabsf(err) <= f->double_ulps
                                  && abs_cl_long(iErr) <= maxiError);
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         // retry per section 6.5.3.2
                         if (IsDoubleResultSubnormal(correct, f->double_ulps))
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
index d048220b..7a3cd981 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -297,7 +297,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
                     cl_long iErr = (int64_t)q2[j] - (int64_t)correct2;
                     int fail = !(fabsf(err) <= float_ulps
                                  && abs_cl_long(iErr) <= maxiError);
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         // retry per section 6.5.3.2
                         if (IsFloatResultSubnormal(correct, float_ulps))
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
index 9478d0bc..621ee6bb 100644
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -249,7 +249,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
 
                     if (fail)
                     {
-                        if (ftz)
+                        if (ftz || relaxedMode)
                         {
                             // retry per section 6.5.3.2
                             if (IsDoubleResultSubnormal(correct,
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
index 848a9bac..0eae2e54 100644
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -253,7 +253,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
 
                     if (fail)
                     {
-                        if (ftz)
+                        if (ftz || relaxedMode)
                         {
                             // retry per section 6.5.3.2
                             if (IsFloatResultSubnormal(correct, float_ulps))