mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-24 07:59:01 +00:00
Initial open source release of OpenCL 2.0 CTS.
This commit is contained in:
280
test_conformance/math_brute_force/Utility.h
Normal file
280
test_conformance/math_brute_force/Utility.h
Normal file
@@ -0,0 +1,280 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef UTILITY_H
|
||||
#define UTILITY_H
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include "../../test_common/harness/rounding_mode.h"
|
||||
#include "../../test_common/harness/fpcontrol.h"
|
||||
|
||||
#if defined( _WIN32) && defined (_MSC_VER)
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/ThreadPool.h"
|
||||
#define BUFFER_SIZE (1024*1024*2)
|
||||
|
||||
#if defined( __GNUC__ )
|
||||
#define UNUSED __attribute__ ((unused))
|
||||
#else
|
||||
#define UNUSED
|
||||
#endif
|
||||
|
||||
extern int gWimpyBufferSize;
|
||||
extern int gWimpyReductionFactor;
|
||||
|
||||
#define VECTOR_SIZE_COUNT 6
|
||||
extern const char *sizeNames[VECTOR_SIZE_COUNT];
|
||||
extern const int sizeValues[VECTOR_SIZE_COUNT];
|
||||
|
||||
extern cl_device_type gDeviceType;
|
||||
extern cl_device_id gDevice;
|
||||
extern cl_context gContext;
|
||||
extern cl_command_queue gQueue;
|
||||
extern void *gIn;
|
||||
extern void *gIn2;
|
||||
extern void *gIn3;
|
||||
extern void *gOut_Ref;
|
||||
extern void *gOut_Ref2;
|
||||
extern void *gOut[VECTOR_SIZE_COUNT];
|
||||
extern void *gOut2[VECTOR_SIZE_COUNT];
|
||||
extern cl_mem gInBuffer;
|
||||
extern cl_mem gInBuffer2;
|
||||
extern cl_mem gInBuffer3;
|
||||
extern cl_mem gOutBuffer[VECTOR_SIZE_COUNT];
|
||||
extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT];
|
||||
extern uint32_t gComputeDevices;
|
||||
extern uint32_t gSimdSize;
|
||||
extern int gSkipCorrectnessTesting;
|
||||
extern int gMeasureTimes;
|
||||
extern int gReportAverageTimes;
|
||||
extern int gForceFTZ;
|
||||
extern volatile int gTestFastRelaxed;
|
||||
extern int gFastRelaxedDerived;
|
||||
extern int gWimpyMode;
|
||||
extern int gHasDouble;
|
||||
extern int gIsInRTZMode;
|
||||
extern int gInfNanSupport;
|
||||
extern int gIsEmbedded;
|
||||
extern int gVerboseBruteForce;
|
||||
extern uint32_t gMaxVectorSizeIndex;
|
||||
extern uint32_t gMinVectorSizeIndex;
|
||||
extern uint32_t gDeviceFrequency;
|
||||
extern cl_device_fp_config gFloatCapabilities;
|
||||
extern cl_device_fp_config gDoubleCapabilities;
|
||||
|
||||
#define LOWER_IS_BETTER 0
|
||||
#define HIGHER_IS_BETTER 1
|
||||
|
||||
#if USE_ATF
|
||||
|
||||
#include <ATF/ATF.h>
|
||||
#define test_start() ATFTestStart()
|
||||
#define test_finish() ATFTestFinish()
|
||||
#define vlog( ... ) ATFLogInfo(__VA_ARGS__)
|
||||
#define vlog_error( ... ) ATFLogError(__VA_ARGS__)
|
||||
#define vlog_perf( _number, _higherIsBetter, _units, _nameFmt, ... ) ATFLogPerformanceNumber(_number, _higherIsBetter, _units, _nameFmt, __VA_ARGS__ )
|
||||
|
||||
#else
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#endif
|
||||
|
||||
#if defined (_MSC_VER )
|
||||
//Deal with missing scalbn on windows
|
||||
#define scalbnf( _a, _i ) ldexpf( _a, _i )
|
||||
#define scalbn( _a, _i ) ldexp( _a, _i )
|
||||
#define scalbnl( _a, _i ) ldexpl( _a, _i )
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
float Abs_Error( float test, double reference );
|
||||
float Ulp_Error( float test, double reference );
|
||||
//float Ulp_Error_Half( float test, double reference );
|
||||
float Ulp_Error_Double( double test, long double reference );
|
||||
#ifdef __cplusplus
|
||||
} //extern "C"
|
||||
#endif
|
||||
|
||||
uint64_t GetTime( void );
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime );
|
||||
int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p );
|
||||
int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p );
|
||||
|
||||
// used to convert a bucket of bits into a search pattern through double
|
||||
static inline double DoubleFromUInt32( uint32_t bits );
|
||||
static inline double DoubleFromUInt32( uint32_t bits )
|
||||
{
|
||||
union{ uint64_t u; double d;} u;
|
||||
|
||||
// split 0x89abcdef to 0x89abc00000000def
|
||||
u.u = bits & 0xfffU;
|
||||
u.u |= (uint64_t) (bits & ~0xfffU) << 32;
|
||||
|
||||
// sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
|
||||
u.u -= (bits & 0x800U) << 1;
|
||||
|
||||
// return result
|
||||
return u.d;
|
||||
}
|
||||
|
||||
void _LogBuildError( cl_program p, int line, const char *file );
|
||||
#define LogBuildError( program ) _LogBuildError( program, __LINE__, __FILE__ )
|
||||
|
||||
#define PERF_LOOP_COUNT 100
|
||||
|
||||
// Note: though this takes a double, this is for use with single precision tests
|
||||
static inline int IsFloatSubnormal( double x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ float d; uint32_t u;}u;
|
||||
u.d = fabsf((float)x);
|
||||
return (u.u-1) < 0x007fffffU;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) FLT_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline int IsDoubleSubnormal( long double x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ double d; uint64_t u;}u;
|
||||
u.d = fabs((double) x);
|
||||
return (u.u-1) < 0x000fffffffffffffULL;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) DBL_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
//The spec is fairly clear that we may enforce a hard cutoff to prevent premature flushing to zero.
|
||||
// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + ulp_limit to be flushed to zero.
|
||||
static inline int IsFloatResultSubnormal( double x, float ulps )
|
||||
{
|
||||
x = fabs(x) - MAKE_HEX_DOUBLE( 0x1.0p-149, 0x1, -149) * (double) ulps;
|
||||
return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
|
||||
}
|
||||
|
||||
static inline int IsFloatResultSubnormalAbsError( double x , float abs_err)
|
||||
{
|
||||
x = x - abs_err;
|
||||
return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
|
||||
}
|
||||
|
||||
static inline int IsDoubleResultSubnormal( long double x, float ulps )
|
||||
{
|
||||
x = fabsl(x) - MAKE_HEX_LONG( 0x1.0p-1074, 0x1, -1074) * (long double) ulps;
|
||||
return x < MAKE_HEX_LONG( 0x1.0p-1022, 0x1, -1022 );
|
||||
}
|
||||
|
||||
static inline int IsFloatInfinity(double x)
|
||||
{
|
||||
union { cl_float d; cl_uint u; } u;
|
||||
u.d = (cl_float) x;
|
||||
return ((u.u & 0x7fffffffU) == 0x7F800000U);
|
||||
}
|
||||
|
||||
static inline int IsFloatMaxFloat(double x)
|
||||
{
|
||||
union { cl_float d; cl_uint u; } u;
|
||||
u.d = (cl_float) x;
|
||||
return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
|
||||
}
|
||||
|
||||
static inline int IsFloatNaN(double x)
|
||||
{
|
||||
union { cl_float d; cl_uint u; } u;
|
||||
u.d = (cl_float) x;
|
||||
return ((u.u & 0x7fffffffU) > 0x7F800000U);
|
||||
}
|
||||
|
||||
extern cl_uint RoundUpToNextPowerOfTwo( cl_uint x );
|
||||
|
||||
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
||||
// (that's x87 default state). This causes problems with the tests that
|
||||
// convert long and ulong to float and double or otherwise deal with values
|
||||
// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
|
||||
static inline void Force64BitFPUPrecision(void)
|
||||
{
|
||||
#if __MINGW32__
|
||||
// The usual method is to use _controlfp as follows:
|
||||
// #include <float.h>
|
||||
// _controlfp(_PC_64, _MCW_PC);
|
||||
//
|
||||
// _controlfp is available on MinGW32 but not on MinGW64. Instead of having
|
||||
// divergent code just use inline assembly which works for both.
|
||||
unsigned short int orig_cw = 0;
|
||||
unsigned short int new_cw = 0;
|
||||
__asm__ __volatile__ ("fstcw %0":"=m" (orig_cw));
|
||||
new_cw = orig_cw | 0x0300; // set precision to 64-bit
|
||||
__asm__ __volatile__ ("fldcw %0"::"m" (new_cw));
|
||||
#elif defined( _WIN32 ) && defined( __INTEL_COMPILER )
|
||||
// Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not* work on win.x64:
|
||||
// > On the x64 architecture, changing the floating point precision is not supported.
|
||||
// (Taken from http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx)
|
||||
int cw;
|
||||
__asm { fnstcw cw }; // Get current value of FPU control word.
|
||||
cw = cw & 0xfffffcff | ( 3 << 8 ); // Set Precision Control to Double Extended Precision.
|
||||
__asm { fldcw cw }; // Set new value of FPU control word.
|
||||
#else
|
||||
/* Implement for other platforms if needed */
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#else
|
||||
extern
|
||||
#endif
|
||||
void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
|
||||
|
||||
typedef union
|
||||
{
|
||||
int32_t i;
|
||||
float f;
|
||||
}int32f_t;
|
||||
|
||||
typedef union
|
||||
{
|
||||
int64_t l;
|
||||
double d;
|
||||
}int64d_t;
|
||||
|
||||
void MulD(double *rhi, double *rlo, double u, double v);
|
||||
void AddD(double *rhi, double *rlo, double a, double b);
|
||||
void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
|
||||
void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
|
||||
void DivideDD(double *chi, double *clo, double a, double b);
|
||||
int compareFloats(float x, float y);
|
||||
int compareDoubles(double x, double y);
|
||||
|
||||
void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed);
|
||||
|
||||
#endif /* UTILITY_H */
|
||||
|
||||
|
||||
Reference in New Issue
Block a user