//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _fpcontrol_h
#define _fpcontrol_h

// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
// to FTZ mode if the device hardware is running in that mode.  We have explored all other options short of writing correctly rounded operations
// in integer code, and have found this is the only way to correctly verify operation.
//
// Non-Apple implementations will need to provide their own implentation for these features.  If the reference hardware and device are both
// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty.  If the device is running in non-default
// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
    typedef int     FPU_mode_type;
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
    #include <xmmintrin.h>
#elif defined( __PPC__ )
    #include <fpu_control.h>
    extern __thread fpu_control_t fpu_control;
#endif
    // Set the reference hardware floating point unit to FTZ mode
    static inline void ForceFTZ( FPU_mode_type *mode )
    {
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
        *mode = _mm_getcsr();
        _mm_setcsr( *mode | 0x8040);
#elif defined( __PPC__ )
        *mode = fpu_control;
        fpu_control |= _FPU_MASK_NI;
#elif defined ( __arm__ )
        unsigned fpscr;
        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
        *mode = fpscr;
        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
        // Add 64 bit support
#elif defined (__aarch64__)
        unsigned fpcr;
        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpcr));
        *mode = fpcr;
        __asm__ volatile ("msr fpcr, %0" :: "r"(fpcr | (1U << 24)));
#else
        #error ForceFTZ needs an implentation
#endif
    }

    // Disable the denorm flush to zero
    static inline void DisableFTZ( FPU_mode_type *mode )
    {
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
        *mode = _mm_getcsr();
        _mm_setcsr( *mode & ~0x8040);
#elif defined( __PPC__ )
        *mode = fpu_control;
        fpu_control &= ~_FPU_MASK_NI;
#elif defined ( __arm__ )
        unsigned fpscr;
        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
        *mode = fpscr;
        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
        // Add 64 bit support
#elif defined (__aarch64__)
        unsigned fpcr;
        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpcr));
        *mode = fpcr;
        __asm__ volatile ("msr fpcr, %0" :: "r"(fpcr & ~(1U << 24)));
#else
#error DisableFTZ needs an implentation
#endif
    }

    // Restore the reference hardware to floating point state indicated by *mode
    static inline void RestoreFPState( FPU_mode_type *mode )
    {
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
        _mm_setcsr( *mode );
#elif defined( __PPC__)
        fpu_control = *mode;
#elif defined (__arm__)
        __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
        // Add 64 bit support
#elif defined (__aarch64__)
        __asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
#else
        #error RestoreFPState needs an implementation
#endif
    }
#else
        #error ForceFTZ and RestoreFPState need implentations
#endif

#endif