Files
OpenCL-CTS/test_common/harness/fpcontrol.h
Marco Antognini 17a0d09567 Cleanup usage of static, extern and typedef (#1256)
* Cleanup usage of static, extern and typedef

Remove static on functions defined headers, as it can result in
duplication in binaries.

Remove unnecessary extern keyword on a function declaration, as it is
the default behavior and can be puzzling when reading the code.

Remove the unused declaration of my_ilogb, which is never defined.

Remove unnecessary usage of typedef, as they are only increasing the
cognitive load of the code for no purpose.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>

* Improve usage of inline and static in harness

Functions declared in header as static can trigger unused warnings when
(indirectly) included in translation units that do not use such
functions. Use inline instead, which also avoids duplicating symbols in
binaries.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
2021-05-18 18:09:46 +01:00

115 lines
3.9 KiB
C

//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _fpcontrol_h
#define _fpcontrol_h
// In order to get tests for correctly rounded operations (e.g. multiply) to
// work properly we need to be able to set the reference hardware to FTZ mode if
// the device hardware is running in that mode. We have explored all other
// options short of writing correctly rounded operations in integer code, and
// have found this is the only way to correctly verify operation.
//
// Non-Apple implementations will need to provide their own implentation for
// these features. If the reference hardware and device are both running in the
// same state (either FTZ or IEEE compliant modes) then these functions may be
// empty. If the device is running in non-default rounding mode (e.g. round
// toward zero), then these functions should also set the reference device into
// that rounding mode.
#if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__) \
|| defined(__MINGW32__)
typedef int FPU_mode_type;
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|| defined(__MINGW32__)
#include <xmmintrin.h>
#elif defined(__PPC__)
#include <fpu_control.h>
extern __thread fpu_control_t fpu_control;
#endif
// Set the reference hardware floating point unit to FTZ mode
inline void ForceFTZ(FPU_mode_type *mode)
{
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|| defined(__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr(*mode | 0x8040);
#elif defined(__PPC__)
*mode = fpu_control;
fpu_control |= _FPU_MASK_NI;
#elif defined(__arm__)
unsigned fpscr;
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
// Add 64 bit support
#elif defined(__aarch64__)
unsigned fpscr;
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
#else
#error ForceFTZ needs an implentation
#endif
}
// Disable the denorm flush to zero
inline void DisableFTZ(FPU_mode_type *mode)
{
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|| defined(__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr(*mode & ~0x8040);
#elif defined(__PPC__)
*mode = fpu_control;
fpu_control &= ~_FPU_MASK_NI;
#elif defined(__arm__)
unsigned fpscr;
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
// Add 64 bit support
#elif defined(__aarch64__)
unsigned fpscr;
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
#else
#error DisableFTZ needs an implentation
#endif
}
// Restore the reference hardware to floating point state indicated by *mode
inline void RestoreFPState(FPU_mode_type *mode)
{
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|| defined(__MINGW32__)
_mm_setcsr(*mode);
#elif defined(__PPC__)
fpu_control = *mode;
#elif defined(__arm__)
__asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
// Add 64 bit support
#elif defined(__aarch64__)
__asm__ volatile("msr fpcr, %0" ::"r"(*mode));
#else
#error RestoreFPState needs an implementation
#endif
}
#else
#error ForceFTZ and RestoreFPState need implentations
#endif
#endif