Initial open source release of OpenCL 2.0 CTS.

2026-03-24 07:59:01 +00:00 · 2017-05-16 18:50:35 +05:30
parent 6911ba5116
commit 3a440d17c8
883 changed files with 318212 additions and 0 deletions
--- a/test_conformance/math_brute_force/Utility.h
+++ b/test_conformance/math_brute_force/Utility.h
@@ -0,0 +1,280 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef UTILITY_H
+#define UTILITY_H
+
+#include "../../test_common/harness/compat.h"
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+#include <stdio.h>
+#include "../../test_common/harness/rounding_mode.h"
+#include "../../test_common/harness/fpcontrol.h"
+
+#if defined( _WIN32) && defined (_MSC_VER)
+#include "../../test_common/harness/testHarness.h"
+#endif
+
+#include "../../test_common/harness/ThreadPool.h"
+#define BUFFER_SIZE         (1024*1024*2)
+
+#if defined( __GNUC__ )
+    #define UNUSED  __attribute__ ((unused))
+#else
+    #define UNUSED
+#endif
+
+extern int gWimpyBufferSize;
+extern int gWimpyReductionFactor;
+
+#define VECTOR_SIZE_COUNT   6
+extern const char *sizeNames[VECTOR_SIZE_COUNT];
+extern const int   sizeValues[VECTOR_SIZE_COUNT];
+
+extern cl_device_type   gDeviceType;
+extern cl_device_id     gDevice;
+extern cl_context       gContext;
+extern cl_command_queue gQueue;
+extern void             *gIn;
+extern void             *gIn2;
+extern void             *gIn3;
+extern void             *gOut_Ref;
+extern void             *gOut_Ref2;
+extern void             *gOut[VECTOR_SIZE_COUNT];
+extern void             *gOut2[VECTOR_SIZE_COUNT];
+extern cl_mem           gInBuffer;
+extern cl_mem           gInBuffer2;
+extern cl_mem           gInBuffer3;
+extern cl_mem           gOutBuffer[VECTOR_SIZE_COUNT];
+extern cl_mem           gOutBuffer2[VECTOR_SIZE_COUNT];
+extern uint32_t         gComputeDevices;
+extern uint32_t         gSimdSize;
+extern int              gSkipCorrectnessTesting;
+extern int              gMeasureTimes;
+extern int              gReportAverageTimes;
+extern int              gForceFTZ;
+extern volatile int     gTestFastRelaxed;
+extern int              gFastRelaxedDerived;
+extern int              gWimpyMode;
+extern int              gHasDouble;
+extern int              gIsInRTZMode;
+extern int              gInfNanSupport;
+extern int              gIsEmbedded;
+extern int              gVerboseBruteForce;
+extern uint32_t         gMaxVectorSizeIndex;
+extern uint32_t         gMinVectorSizeIndex;
+extern uint32_t         gDeviceFrequency;
+extern cl_device_fp_config gFloatCapabilities;
+extern cl_device_fp_config gDoubleCapabilities;
+
+#define LOWER_IS_BETTER     0
+#define HIGHER_IS_BETTER    1
+
+#if USE_ATF
+
+    #include <ATF/ATF.h>
+    #define test_start()        ATFTestStart()
+    #define test_finish()       ATFTestFinish()
+    #define vlog( ... )         ATFLogInfo(__VA_ARGS__)
+    #define vlog_error( ... )   ATFLogError(__VA_ARGS__)
+    #define vlog_perf( _number, _higherIsBetter, _units, _nameFmt, ... )    ATFLogPerformanceNumber(_number, _higherIsBetter, _units, _nameFmt, __VA_ARGS__ )
+
+#else
+    #include "../../test_common/harness/errorHelpers.h"
+#endif
+
+#if defined (_MSC_VER )
+    //Deal with missing scalbn on windows
+    #define scalbnf( _a, _i )       ldexpf( _a, _i )
+    #define scalbn( _a, _i )        ldexp( _a, _i )
+    #define scalbnl( _a, _i )       ldexpl( _a, _i )
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+float Abs_Error( float test, double reference );
+float Ulp_Error( float test, double reference );
+//float Ulp_Error_Half( float test, double reference );
+float Ulp_Error_Double( double test, long double reference );
+#ifdef __cplusplus
+} //extern "C"
+#endif
+
+uint64_t GetTime( void );
+double SubtractTime( uint64_t endTime, uint64_t startTime );
+int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p );
+int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+// used to convert a bucket of bits into a search pattern through double
+static inline double DoubleFromUInt32( uint32_t bits );
+static inline double DoubleFromUInt32( uint32_t bits )
+{
+    union{ uint64_t u; double d;} u;
+
+    // split 0x89abcdef to 0x89abc00000000def
+    u.u = bits & 0xfffU;
+    u.u |= (uint64_t) (bits & ~0xfffU) << 32;
+
+    // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
+    u.u -= (bits & 0x800U) << 1;
+
+    // return result
+    return u.d;
+}
+
+void _LogBuildError( cl_program p, int line, const char *file );
+#define LogBuildError( program )        _LogBuildError( program, __LINE__, __FILE__ )
+
+#define PERF_LOOP_COUNT 100
+
+// Note: though this takes a double, this is for use with single precision tests
+static inline int IsFloatSubnormal( double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ float d; uint32_t u;}u;
+    u.d = fabsf((float)x);
+    return (u.u-1) < 0x007fffffU;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) FLT_MIN && x != 0.0;
+#endif
+}
+
+
+static inline int IsDoubleSubnormal( long double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ double d; uint64_t u;}u;
+    u.d = fabs((double) x);
+    return (u.u-1) < 0x000fffffffffffffULL;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) DBL_MIN && x != 0.0;
+#endif
+}
+
+//The spec is fairly clear that we may enforce a hard cutoff to prevent premature flushing to zero.
+// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + ulp_limit to be flushed to zero.
+static inline int IsFloatResultSubnormal( double x, float ulps )
+{
+    x = fabs(x) - MAKE_HEX_DOUBLE( 0x1.0p-149, 0x1, -149) * (double) ulps;
+    return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
+}
+
+static inline int IsFloatResultSubnormalAbsError( double x , float abs_err)
+{
+  x = x - abs_err;
+  return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
+}
+
+static inline int IsDoubleResultSubnormal( long double x, float ulps )
+{
+    x = fabsl(x) - MAKE_HEX_LONG( 0x1.0p-1074, 0x1, -1074) * (long double) ulps;
+    return x < MAKE_HEX_LONG( 0x1.0p-1022, 0x1, -1022 );
+}
+
+static inline int IsFloatInfinity(double x)
+{
+  union { cl_float d; cl_uint u; } u;
+  u.d = (cl_float) x;
+  return ((u.u & 0x7fffffffU) == 0x7F800000U);
+}
+
+static inline int IsFloatMaxFloat(double x)
+{
+  union { cl_float d; cl_uint u; } u;
+  u.d = (cl_float) x;
+  return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
+}
+
+static inline int IsFloatNaN(double x)
+{
+  union { cl_float d; cl_uint u; } u;
+  u.d = (cl_float) x;
+  return ((u.u & 0x7fffffffU) > 0x7F800000U);
+}
+
+extern cl_uint RoundUpToNextPowerOfTwo( cl_uint x );
+
+// Windows (since long double got deprecated) sets the x87 to 53-bit precision
+// (that's x87 default state).  This causes problems with the tests that
+// convert long and ulong to float and double or otherwise deal with values
+// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
+static inline void Force64BitFPUPrecision(void)
+{
+#if __MINGW32__
+    // The usual method is to use _controlfp as follows:
+    //     #include <float.h>
+    //     _controlfp(_PC_64, _MCW_PC);
+    //
+    // _controlfp is available on MinGW32 but not on MinGW64. Instead of having
+    // divergent code just use inline assembly which works for both.
+    unsigned short int orig_cw = 0;
+    unsigned short int new_cw = 0;
+    __asm__ __volatile__ ("fstcw %0":"=m" (orig_cw));
+    new_cw = orig_cw | 0x0300;   // set precision to 64-bit
+    __asm__ __volatile__ ("fldcw  %0"::"m" (new_cw));
+#elif defined( _WIN32 ) && defined( __INTEL_COMPILER )
+    // Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not* work on win.x64:
+    // > On the x64 architecture, changing the floating point precision is not supported.
+    // (Taken from http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx)
+    int cw;
+    __asm { fnstcw cw };    // Get current value of FPU control word.
+    cw = cw & 0xfffffcff | ( 3 << 8 ); // Set Precision Control to Double Extended Precision.
+    __asm { fldcw cw };     // Set new value of FPU control word.
+#else
+    /* Implement for other platforms if needed */
+#endif
+}
+
+#ifdef __cplusplus
+extern "C"
+#else
+extern
+#endif
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
+
+typedef union
+{
+    int32_t i;
+    float   f;
+}int32f_t;
+
+typedef union
+{
+    int64_t l;
+    double  d;
+}int64d_t;
+
+void MulD(double *rhi, double *rlo, double u, double v);
+void AddD(double *rhi, double *rlo, double a, double b);
+void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
+void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
+void DivideDD(double *chi, double *clo, double a, double b);
+int compareFloats(float x, float y);
+int compareDoubles(double x, double y);
+
+void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed);
+
+#endif /* UTILITY_H */
+
+