//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "Utility.h"
#include "FunctionList.h"

#if defined(__PPC__)
// Global varaiable used to hold the FPU control register state. The FPSCR register can not
// be used because not all Power implementations retain or observed the NI (non-IEEE
// mode) bit.
__thread fpu_control_t fpu_control = 0;
#endif

void MulD(double *rhi, double *rlo, double u, double v)
{
    const double c = 134217729.0; // 1+2^27
    double up, u1, u2, vp, v1, v2;

    up = u*c;
    u1 = (u - up) + up;
    u2 = u - u1;

    vp = v*c;
    v1 = (v - vp) + vp;
    v2 = v - v1;

    double rh = u*v;
    double rl = (((u1*v1 - rh) + (u1*v2)) + (u2*v1)) + (u2*v2);

    *rhi = rh;
    *rlo = rl;
}

void AddD(double *rhi, double *rlo, double a, double b)
{
    double zhi, zlo;
    zhi = a + b;
    if(fabs(a) > fabs(b)) {
        zlo = zhi - a;
        zlo = b - zlo;
    }
    else {
        zlo = zhi - b;
        zlo = a - zlo;
    }

    *rhi = zhi;
    *rlo = zlo;
}

void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
{
    double mh, ml;
    double c = 134217729.0;
    double up, u1, u2, vp, v1, v2;

    up = xh*c;
    u1 = (xh - up) + up;
    u2 = xh - u1;

    vp = yh*c;
    v1 = (yh - vp) + vp;
    v2 = yh - v1;

    mh = xh*yh;
    ml = (((u1*v1 - mh) + (u1*v2)) + (u2*v1)) + (u2*v2);
    ml += xh*yl + xl*yh;

    *rhi = mh + ml;
    *rlo = (mh - (*rhi)) + ml;
}

void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
{
    double r, s;
    r = xh + yh;
    s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl) : (yh - r + xh + xl + yl);
    *rhi = r + s;
    *rlo = (r - (*rhi)) + s;
}

void DivideDD(double *chi, double *clo, double a, double b)
{
    *chi = a / b;
    double rhi, rlo;
    MulD(&rhi, &rlo, *chi, b);
    AddDD(&rhi, &rlo, -rhi, -rlo, a, 0.0);
    *clo = rhi / b;
}

// These functions comapre two floats/doubles. Since some platforms may choose to
// flush denormals to zeros before comparison, comparison like a < b may give wrong
// result in "certain cases" where we do need correct compasion result when operands
// are denormals .... these functions comapre floats/doubles using signed integer/long int
// rep. In other cases, when flushing to zeros is fine, these should not be used.
// Also these doesn't check for nans and assume nans are handled separately as special edge case
// by the caller which calls these functions
// return 0 if both are equal, 1 if x > y and -1 if x < y.

inline
int compareFloats(float x, float y)
{
    int32f_t a, b;

    a.f = x;
    b.f = y;

    if( a.i & 0x80000000 )
        a.i = 0x80000000 - a.i;
    if( b.i & 0x80000000 )
        b.i = 0x80000000 - b.i;

    if( a.i == b.i )
        return 0;

    return a.i < b.i ? -1 : 1;
}

inline
int compareDoubles(double x, double y)
{
    int64d_t a, b;

    a.d = x;
    b.d = y;

    if( a.l & 0x8000000000000000LL )
        a.l = 0x8000000000000000LL - a.l;
    if( b.l & 0x8000000000000000LL )
        b.l = 0x8000000000000000LL - b.l;

    if( a.l == b.l )
        return 0;

    return a.l < b.l ? -1 : 1;
}

void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed)
{
    char const *fpSizeStr = NULL;
    char const *fpFastRelaxedStr = "";
    switch (float_size) {
    case sizeof(cl_double):
        fpSizeStr = "fp64";
        break;
    case sizeof(cl_float):
        fpSizeStr = "fp32";
        break;
    case sizeof(cl_half):
        fpSizeStr = "fp16";
        break;
    }
    if (isFastRelaxed) {
        fpFastRelaxedStr = "rlx";
    }
    vlog("%15s %4s %4s",fname, fpSizeStr, fpFastRelaxedStr);
}

float getAllowedUlpError(const Func *f, const bool relaxed)
{
    float ulp;

    if (relaxed)
    {
        if (gIsEmbedded)
        {
            ulp = f->relaxed_embedded_error;
        }
        else
        {
            ulp = f->relaxed_error;
        }
    }
    else
    {
        if (gIsEmbedded)
        {
            ulp = f->float_embedded_ulps;
        }
        else
        {
            ulp = f->float_ulps;
        }
    }

    return ulp;
}