From bb6e8eabccffaf797ed0f306d2e23e492ed01856 Mon Sep 17 00:00:00 2001
From: Jeremy Kemp <jeremy@jeremykemp.co.uk>
Date: Mon, 9 Nov 2020 18:47:13 +0000
Subject: [PATCH] Restored the embedded reduction factor to bruteforce.

This change was present on the GitLab branch but missed out during the transition to GitHub.

This change is intentionally as close as possible to the patch on GitLab.

Fixes #1045
---
 test_conformance/math_brute_force/Utility.h            |  1 +
 test_conformance/math_brute_force/binary.cpp           | 10 ++++++++++
 test_conformance/math_brute_force/binaryOperator.cpp   |  8 ++++++++
 test_conformance/math_brute_force/binary_i.cpp         | 10 ++++++++++
 .../math_brute_force/binary_two_results_i.cpp          |  8 ++++++++
 test_conformance/math_brute_force/i_unary.cpp          |  9 +++++++++
 test_conformance/math_brute_force/macro_binary.cpp     |  9 +++++++++
 test_conformance/math_brute_force/macro_unary.cpp      |  9 +++++++++
 test_conformance/math_brute_force/mad.cpp              | 10 ++++++++++
 test_conformance/math_brute_force/ternary.cpp          |  8 ++++++++
 test_conformance/math_brute_force/unary.cpp            | 10 ++++++++++
 .../math_brute_force/unary_two_results.cpp             |  8 ++++++++
 .../math_brute_force/unary_two_results_i.cpp           |  9 +++++++++
 test_conformance/math_brute_force/unary_u.cpp          | 10 ++++++++++
 14 files changed, 119 insertions(+)

diff --git a/test_conformance/math_brute_force/Utility.h b/test_conformance/math_brute_force/Utility.h
index 31256358..7be12874 100644
--- a/test_conformance/math_brute_force/Utility.h
+++ b/test_conformance/math_brute_force/Utility.h
@@ -31,6 +31,7 @@
 #include "harness/conversions.h"
 
 #define BUFFER_SIZE         (1024*1024*2)
+#define EMBEDDED_REDUCTION_FACTOR (64)
 
 #if defined( __GNUC__ )
     #define UNUSED  __attribute__ ((unused))
diff --git a/test_conformance/math_brute_force/binary.cpp b/test_conformance/math_brute_force/binary.cpp
index eb5007c0..2e61d86e 100644
--- a/test_conformance/math_brute_force/binary.cpp
+++ b/test_conformance/math_brute_force/binary.cpp
@@ -283,6 +283,11 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter,
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
         test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+    else if (gIsEmbedded)
+    {
+        test_info.scale *= EMBEDDED_REDUCTION_FACTOR;
+    }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -1021,6 +1026,11 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d,
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
+    else if (gIsEmbedded)
+    {
+        test_info.scale *= EMBEDDED_REDUCTION_FACTOR;
+    }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
diff --git a/test_conformance/math_brute_force/binaryOperator.cpp b/test_conformance/math_brute_force/binaryOperator.cpp
index 0742964d..26c7cc4c 100644
--- a/test_conformance/math_brute_force/binaryOperator.cpp
+++ b/test_conformance/math_brute_force/binaryOperator.cpp
@@ -274,6 +274,10 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
         test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+    else if (gIsEmbedded)
+    {
+        test_info.scale *= EMBEDDED_REDUCTION_FACTOR;
+    }
 
     test_info.step = test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
@@ -969,6 +973,10 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
+    else if (gIsEmbedded)
+    {
+        test_info.scale *= EMBEDDED_REDUCTION_FACTOR;
+    }
 
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
diff --git a/test_conformance/math_brute_force/binary_i.cpp b/test_conformance/math_brute_force/binary_i.cpp
index 6ba0eb58..1f2e9be3 100644
--- a/test_conformance/math_brute_force/binary_i.cpp
+++ b/test_conformance/math_brute_force/binary_i.cpp
@@ -272,6 +272,11 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
         test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+    else if (gIsEmbedded)
+    {
+        test_info.scale *= EMBEDDED_REDUCTION_FACTOR;
+    }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -786,6 +791,11 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
+    else if (gIsEmbedded)
+    {
+        test_info.scale *= EMBEDDED_REDUCTION_FACTOR;
+    }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
diff --git a/test_conformance/math_brute_force/binary_two_results_i.cpp b/test_conformance/math_brute_force/binary_two_results_i.cpp
index c5577b9e..5d794010 100644
--- a/test_conformance/math_brute_force/binary_two_results_i.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i.cpp
@@ -297,6 +297,10 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     if(gWimpyMode ){
         step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(float)) * EMBEDDED_REDUCTION_FACTOR;
+    }
 
     if( gIsEmbedded )
         float_ulps = f->float_embedded_ulps;
@@ -722,6 +726,10 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     if(gWimpyMode ){
        step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(double)) * EMBEDDED_REDUCTION_FACTOR;
+    }
 
 #if defined PARALLEL_REFERENCE
     cl_uint threadCount = GetThreadCount();
diff --git a/test_conformance/math_brute_force/i_unary.cpp b/test_conformance/math_brute_force/i_unary.cpp
index 379d8e35..50e1f1af 100644
--- a/test_conformance/math_brute_force/i_unary.cpp
+++ b/test_conformance/math_brute_force/i_unary.cpp
@@ -199,6 +199,10 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
     {
         step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(cl_float)) * EMBEDDED_REDUCTION_FACTOR;
+    }
 
     // This test is not using ThreadPool so we need to disable FTZ here
     // for reference computations
@@ -420,6 +424,11 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
     {
         step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(cl_double)) * EMBEDDED_REDUCTION_FACTOR;
+    }
+
     // This test is not using ThreadPool so we need to disable FTZ here
     // for reference computations
     FPU_mode_type oldMode;
diff --git a/test_conformance/math_brute_force/macro_binary.cpp b/test_conformance/math_brute_force/macro_binary.cpp
index b590f50a..a4e4f97b 100644
--- a/test_conformance/math_brute_force/macro_binary.cpp
+++ b/test_conformance/math_brute_force/macro_binary.cpp
@@ -259,6 +259,11 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
         test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+    else if (gIsEmbedded)
+    {
+        test_info.scale *= EMBEDDED_REDUCTION_FACTOR;
+    }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -771,6 +776,10 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
          test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
+    else if (gIsEmbedded)
+    {
+        test_info.scale *= EMBEDDED_REDUCTION_FACTOR;
+    }
 
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
diff --git a/test_conformance/math_brute_force/macro_unary.cpp b/test_conformance/math_brute_force/macro_unary.cpp
index 872007f1..773bf0b4 100644
--- a/test_conformance/math_brute_force/macro_unary.cpp
+++ b/test_conformance/math_brute_force/macro_unary.cpp
@@ -230,6 +230,11 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
         test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+    else if (gIsEmbedded)
+    {
+        test_info.scale *= EMBEDDED_REDUCTION_FACTOR;
+    }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -629,6 +634,10 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
+    else if (gIsEmbedded)
+    {
+        test_info.scale *= EMBEDDED_REDUCTION_FACTOR;
+    }
 
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
diff --git a/test_conformance/math_brute_force/mad.cpp b/test_conformance/math_brute_force/mad.cpp
index 0737afbc..314caaeb 100644
--- a/test_conformance/math_brute_force/mad.cpp
+++ b/test_conformance/math_brute_force/mad.cpp
@@ -213,6 +213,11 @@ int TestFunc_mad(const Func *f, MTdata d, bool relaxedMode)
     {
         step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(float)) * EMBEDDED_REDUCTION_FACTOR;
+    }
+
     // Init the kernels
     BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                    f->nameInCode, relaxedMode };
@@ -680,6 +685,11 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
     {
         step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(double)) * EMBEDDED_REDUCTION_FACTOR;
+    }
+
     // Init the kernels
     BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
                                    f->nameInCode, relaxedMode };
diff --git a/test_conformance/math_brute_force/ternary.cpp b/test_conformance/math_brute_force/ternary.cpp
index 2c4b503e..4ca38327 100644
--- a/test_conformance/math_brute_force/ternary.cpp
+++ b/test_conformance/math_brute_force/ternary.cpp
@@ -238,6 +238,10 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     {
         step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(float)) * EMBEDDED_REDUCTION_FACTOR;
+    }
 
     if( gIsEmbedded )
         float_ulps = f->float_embedded_ulps;
@@ -879,6 +883,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
     {
         step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(double)) * EMBEDDED_REDUCTION_FACTOR;
+    }
 
     Force64BitFPUPrecision();
 
diff --git a/test_conformance/math_brute_force/unary.cpp b/test_conformance/math_brute_force/unary.cpp
index 91bc92d9..4605e34f 100644
--- a/test_conformance/math_brute_force/unary.cpp
+++ b/test_conformance/math_brute_force/unary.cpp
@@ -246,6 +246,11 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
         test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
+    else if (gIsEmbedded)
+    {
+        test_info.scale *= EMBEDDED_REDUCTION_FACTOR;
+    }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
@@ -1033,6 +1038,11 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
+    else if (gIsEmbedded)
+    {
+        test_info.scale *= EMBEDDED_REDUCTION_FACTOR;
+    }
+
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
     if (test_info.step / test_info.subBufferSize != test_info.scale)
     {
diff --git a/test_conformance/math_brute_force/unary_two_results.cpp b/test_conformance/math_brute_force/unary_two_results.cpp
index a86277f1..7ea7d859 100644
--- a/test_conformance/math_brute_force/unary_two_results.cpp
+++ b/test_conformance/math_brute_force/unary_two_results.cpp
@@ -215,6 +215,10 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
     {
         step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(float)) * EMBEDDED_REDUCTION_FACTOR;
+    }
 
     // Init the kernels
     BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
@@ -674,6 +678,10 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
     {
         step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(double)) * EMBEDDED_REDUCTION_FACTOR;
+    }
 
     Force64BitFPUPrecision();
 
diff --git a/test_conformance/math_brute_force/unary_two_results_i.cpp b/test_conformance/math_brute_force/unary_two_results_i.cpp
index 108be6a4..8d0ecda9 100644
--- a/test_conformance/math_brute_force/unary_two_results_i.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i.cpp
@@ -218,6 +218,11 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
     {
         step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(float)) * EMBEDDED_REDUCTION_FACTOR;
+    }
+
     if( gIsEmbedded )
         float_ulps = f->float_embedded_ulps;
     else
@@ -521,6 +526,10 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
     {
         step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(double)) * EMBEDDED_REDUCTION_FACTOR;
+    }
 
     Force64BitFPUPrecision();
 
diff --git a/test_conformance/math_brute_force/unary_u.cpp b/test_conformance/math_brute_force/unary_u.cpp
index 87fcae32..bb9b3995 100644
--- a/test_conformance/math_brute_force/unary_u.cpp
+++ b/test_conformance/math_brute_force/unary_u.cpp
@@ -207,6 +207,11 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
     {
         step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(float)) * EMBEDDED_REDUCTION_FACTOR;
+    }
+
     if( gIsEmbedded)
         float_ulps = f->float_embedded_ulps;
     else
@@ -480,6 +485,11 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
     {
         step = (1ULL<<32) * gWimpyReductionFactor / (512);
     }
+    else if (gIsEmbedded)
+    {
+        step = (BUFFER_SIZE / sizeof(double)) * EMBEDDED_REDUCTION_FACTOR;
+    }
+
     Force64BitFPUPrecision();
 
     // Init the kernels