Files
OpenCL-CTS/test_conformance/math_brute_force/utility.h
Marco Antognini b7e7a3eb65 Remove unsupported code (#1211)
* Remove code for runtime measurement

The GetTime() and associated functions are not fully implemented on
Linux. This functionality is assumed to be untested, or unused at best.

Reduce differences between tests by removing this unnecessary feature.
It can be (re-)implemented later, if desired, once the math_brute_force
component is in better shape.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>

* Coalesce if-statements

Signed-off-by: Marco Antognini <marco.antognini@arm.com>

* Keep else branch

Address comments.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
2021-04-13 15:58:44 +01:00

271 lines
7.7 KiB
C

//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef UTILITY_H
#define UTILITY_H
#include "harness/compat.h"
#include "harness/rounding_mode.h"
#include "harness/fpcontrol.h"
#include "harness/testHarness.h"
#include "harness/ThreadPool.h"
#include "harness/conversions.h"
#define BUFFER_SIZE (1024 * 1024 * 2)
#define EMBEDDED_REDUCTION_FACTOR (64)
#if defined(__GNUC__)
#define UNUSED __attribute__((unused))
#else
#define UNUSED
#endif
struct Func;
extern int gWimpyBufferSize;
extern int gWimpyReductionFactor;
#define VECTOR_SIZE_COUNT 6
extern const char *sizeNames[VECTOR_SIZE_COUNT];
extern const int sizeValues[VECTOR_SIZE_COUNT];
extern cl_device_id gDevice;
extern cl_context gContext;
extern cl_command_queue gQueue;
extern void *gIn;
extern void *gIn2;
extern void *gIn3;
extern void *gOut_Ref;
extern void *gOut_Ref2;
extern void *gOut[VECTOR_SIZE_COUNT];
extern void *gOut2[VECTOR_SIZE_COUNT];
extern cl_mem gInBuffer;
extern cl_mem gInBuffer2;
extern cl_mem gInBuffer3;
extern cl_mem gOutBuffer[VECTOR_SIZE_COUNT];
extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT];
extern uint32_t gComputeDevices;
extern uint32_t gSimdSize;
extern int gSkipCorrectnessTesting;
extern int gForceFTZ;
extern int gFastRelaxedDerived;
extern int gWimpyMode;
extern int gHasDouble;
extern int gIsInRTZMode;
extern int gInfNanSupport;
extern int gIsEmbedded;
extern int gVerboseBruteForce;
extern uint32_t gMaxVectorSizeIndex;
extern uint32_t gMinVectorSizeIndex;
extern uint32_t gDeviceFrequency;
extern cl_device_fp_config gFloatCapabilities;
extern cl_device_fp_config gDoubleCapabilities;
#define LOWER_IS_BETTER 0
#define HIGHER_IS_BETTER 1
#include "harness/errorHelpers.h"
#if defined(_MSC_VER)
// Deal with missing scalbn on windows
#define scalbnf(_a, _i) ldexpf(_a, _i)
#define scalbn(_a, _i) ldexp(_a, _i)
#define scalbnl(_a, _i) ldexpl(_a, _i)
#endif
float Abs_Error(float test, double reference);
float Ulp_Error(float test, double reference);
float Bruteforce_Ulp_Error_Double(double test, long double reference);
int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
cl_program *p, bool relaxedMode);
int MakeKernels(const char **c, cl_uint count, const char *name,
cl_uint kernel_count, cl_kernel *k, cl_program *p,
bool relaxedMode);
// used to convert a bucket of bits into a search pattern through double
static inline double DoubleFromUInt32(uint32_t bits);
static inline double DoubleFromUInt32(uint32_t bits)
{
union {
uint64_t u;
double d;
} u;
// split 0x89abcdef to 0x89abc00000000def
u.u = bits & 0xfffU;
u.u |= (uint64_t)(bits & ~0xfffU) << 32;
// sign extend the leading bit of def segment as sign bit so that the middle
// region consists of either all 1s or 0s
u.u -= (bits & 0x800U) << 1;
// return result
return u.d;
}
void _LogBuildError(cl_program p, int line, const char *file);
#define LogBuildError(program) _LogBuildError(program, __LINE__, __FILE__)
// The spec is fairly clear that we may enforce a hard cutoff to prevent
// premature flushing to zero.
// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN +
// ulp_limit to be flushed to zero.
static inline int IsFloatResultSubnormal(double x, float ulps)
{
x = fabs(x) - MAKE_HEX_DOUBLE(0x1.0p-149, 0x1, -149) * (double)ulps;
return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
}
static inline int IsFloatResultSubnormalAbsError(double x, float abs_err)
{
x = x - abs_err;
return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
}
static inline int IsDoubleResultSubnormal(long double x, float ulps)
{
x = fabsl(x) - MAKE_HEX_LONG(0x1.0p-1074, 0x1, -1074) * (long double)ulps;
return x < MAKE_HEX_LONG(0x1.0p-1022, 0x1, -1022);
}
static inline int IsFloatInfinity(double x)
{
union {
cl_float d;
cl_uint u;
} u;
u.d = (cl_float)x;
return ((u.u & 0x7fffffffU) == 0x7F800000U);
}
static inline int IsFloatMaxFloat(double x)
{
union {
cl_float d;
cl_uint u;
} u;
u.d = (cl_float)x;
return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
}
static inline int IsFloatNaN(double x)
{
union {
cl_float d;
cl_uint u;
} u;
u.d = (cl_float)x;
return ((u.u & 0x7fffffffU) > 0x7F800000U);
}
extern cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
// (that's x87 default state). This causes problems with the tests that
// convert long and ulong to float and double or otherwise deal with values
// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
static inline void Force64BitFPUPrecision(void)
{
#if __MINGW32__
// The usual method is to use _controlfp as follows:
// #include <float.h>
// _controlfp(_PC_64, _MCW_PC);
//
// _controlfp is available on MinGW32 but not on MinGW64. Instead of having
// divergent code just use inline assembly which works for both.
unsigned short int orig_cw = 0;
unsigned short int new_cw = 0;
__asm__ __volatile__("fstcw %0" : "=m"(orig_cw));
new_cw = orig_cw | 0x0300; // set precision to 64-bit
__asm__ __volatile__("fldcw %0" ::"m"(new_cw));
#elif defined(_WIN32) && defined(__INTEL_COMPILER)
// Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not*
// work on win.x64: > On the x64 architecture, changing the floating point
// precision is not supported. (Taken from
// http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx)
int cw;
__asm { fnstcw cw }
; // Get current value of FPU control word.
cw = cw & 0xfffffcff
| (3 << 8); // Set Precision Control to Double Extended Precision.
__asm { fldcw cw }
; // Set new value of FPU control word.
#else
/* Implement for other platforms if needed */
#endif
}
extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
typedef union {
int32_t i;
float f;
} int32f_t;
typedef union {
int64_t l;
double d;
} int64d_t;
void MulD(double *rhi, double *rlo, double u, double v);
void AddD(double *rhi, double *rlo, double a, double b);
void MulDD(double *rhi, double *rlo, double xh, double xl, double yh,
double yl);
void AddDD(double *rhi, double *rlo, double xh, double xl, double yh,
double yl);
void DivideDD(double *chi, double *clo, double a, double b);
int compareFloats(float x, float y);
int compareDoubles(double x, double y);
void logFunctionInfo(const char *fname, unsigned int float_size,
unsigned int isFastRelaxed);
float getAllowedUlpError(const Func *f, const bool relaxed);
static inline cl_uint getTestScale(size_t typeSize)
{
if (gWimpyMode)
{
return (cl_uint)typeSize * 2 * gWimpyReductionFactor;
}
else if (gIsEmbedded)
{
return EMBEDDED_REDUCTION_FACTOR;
}
else
{
return 1;
}
}
static inline uint64_t getTestStep(size_t typeSize, size_t bufferSize)
{
if (gWimpyMode)
{
return (1ULL << 32) * gWimpyReductionFactor / (512);
}
else if (gIsEmbedded)
{
return (BUFFER_SIZE / typeSize) * EMBEDDED_REDUCTION_FACTOR;
}
else
{
return bufferSize / typeSize;
}
}
#endif /* UTILITY_H */