Files
OpenCL-CTS/test_common/harness/rounding_mode.c
2017-05-16 18:50:35 +05:30

242 lines
8.3 KiB
C

//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "rounding_mode.h"
#if (defined( __arm__ ) || defined(__aarch64__))
#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:
#define _ARM_FE_FTZ 0x1000000
#define _ARM_FE_NFTZ 0x0
#if defined(__aarch64__)
#define _FPU_GETCW(cw) __asm__ ("MRS %0,FPCR" : "=r" (cw))
#define _FPU_SETCW(cw) __asm__ ("MSR FPCR,%0" : :"ri" (cw))
#else
#define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
#define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
#endif
#endif
#if (defined( __arm__ ) || defined(__aarch64__)) && defined( __GNUC__ )
#define _ARM_FE_TONEAREST 0x0
#define _ARM_FE_UPWARD 0x400000
#define _ARM_FE_DOWNWARD 0x800000
#define _ARM_FE_TOWARDZERO 0xc00000
RoundingMode set_round( RoundingMode r, Type outType )
{
static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
_ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
_ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
const int *p = int_rounds;
if( outType == kfloat || outType == kdouble )
p = flt_rounds;
int fpscr = 0;
RoundingMode oldRound = get_round();
_FPU_GETCW(fpscr);
_FPU_SETCW( p[r] | (fpscr & ~FPSCR_ROUND_MASK));
return oldRound;
}
RoundingMode get_round( void )
{
int fpscr;
int oldRound;
_FPU_GETCW(fpscr);
oldRound = (fpscr & FPSCR_ROUND_MASK);
switch( oldRound )
{
case _ARM_FE_TONEAREST:
return kRoundToNearestEven;
case _ARM_FE_UPWARD:
return kRoundUp;
case _ARM_FE_DOWNWARD:
return kRoundDown;
case _ARM_FE_TOWARDZERO:
return kRoundTowardZero;
}
return kDefaultRoundingMode;
}
#elif !(defined(_WIN32) && defined(_MSC_VER))
RoundingMode set_round( RoundingMode r, Type outType )
{
static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
const int *p = int_rounds;
if( outType == kfloat || outType == kdouble )
p = flt_rounds;
int oldRound = fegetround();
fesetround( p[r] );
switch( oldRound )
{
case FE_TONEAREST:
return kRoundToNearestEven;
case FE_UPWARD:
return kRoundUp;
case FE_DOWNWARD:
return kRoundDown;
case FE_TOWARDZERO:
return kRoundTowardZero;
default:
abort(); // ??!
}
return kDefaultRoundingMode; //never happens
}
RoundingMode get_round( void )
{
int oldRound = fegetround();
switch( oldRound )
{
case FE_TONEAREST:
return kRoundToNearestEven;
case FE_UPWARD:
return kRoundUp;
case FE_DOWNWARD:
return kRoundDown;
case FE_TOWARDZERO:
return kRoundTowardZero;
}
return kDefaultRoundingMode;
}
#else
RoundingMode set_round( RoundingMode r, Type outType )
{
static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
unsigned int oldRound;
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
if (err) {
vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
return kDefaultRoundingMode; //what else never happens
}
oldRound &= _MCW_RC;
RoundingMode old =
(oldRound == _RC_NEAR)? kRoundToNearestEven :
(oldRound == _RC_UP)? kRoundUp :
(oldRound == _RC_DOWN)? kRoundDown :
(oldRound == _RC_CHOP)? kRoundTowardZero:
kDefaultRoundingMode;
_controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
return old; //returning old rounding mode
}
RoundingMode get_round( void )
{
unsigned int oldRound;
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
oldRound &= _MCW_RC;
return
(oldRound == _RC_NEAR)? kRoundToNearestEven :
(oldRound == _RC_UP)? kRoundUp :
(oldRound == _RC_DOWN)? kRoundDown :
(oldRound == _RC_CHOP)? kRoundTowardZero:
kDefaultRoundingMode;
}
#endif
//
// FlushToZero() sets the host processor into ftz mode. It is intended to have a remote effect on the behavior of the code in
// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
// software by testing against FLT_MIN or DBL_MIN in that file.
//
// Note: IEEE-754 says conversions are basic operations. As such they do *NOT* have the behavior in section 7.5.3 of
// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
// operators do (e.g. add, subtract, multiply, divide, etc.)
//
// Configuring hardware to FTZ mode varies by platform.
// CAUTION: Some C implementations may also fail to behave properly in this mode.
//
// On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
// is used for floating point computation! If your OS uses x87, you'll need to figure out how
// to turn that off for the conversions code in basic_test_conversions.c so that they flush to
// zero properly. Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
// in which case, these function are at liberty to do nothing.
//
#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
#include <xmmintrin.h>
#elif defined( __PPC__ )
#include <fpu_control.h>
#endif
void *FlushToZero( void )
{
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
union{ int i; void *p; }u = { _mm_getcsr() };
_mm_setcsr( u.i | 0x8040 );
return u.p;
#elif defined( __arm__ ) || defined(__aarch64__)
int fpscr;
_FPU_GETCW(fpscr);
_FPU_SETCW(fpscr | FPSCR_FZ);
return NULL;
#elif defined( __PPC__ )
fpu_control_t flags = 0;
_FPU_GETCW(flags);
flags |= _FPU_MASK_NI;
_FPU_SETCW(flags);
return NULL;
#else
#error Unknown arch
#endif
#else
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
#endif
}
// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
void UnFlushToZero( void *p)
{
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
union{ void *p; int i; }u = { p };
_mm_setcsr( u.i );
#elif defined( __arm__ ) || defined(__aarch64__)
int fpscr;
_FPU_GETCW(fpscr);
_FPU_SETCW(fpscr & ~FPSCR_FZ);
#elif defined( __PPC__)
fpu_control_t flags = 0;
_FPU_GETCW(flags);
flags &= ~_FPU_MASK_NI;
_FPU_SETCW(flags);
#else
#error Unknown arch
#endif
#else
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
#endif
}