// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "Utility.h" #include "FunctionList.h" #if defined(__PPC__) // Global varaiable used to hold the FPU control register state. The FPSCR register can not // be used because not all Power implementations retain or observed the NI (non-IEEE // mode) bit. __thread fpu_control_t fpu_control = 0; #endif void MulD(double *rhi, double *rlo, double u, double v) { const double c = 134217729.0; // 1+2^27 double up, u1, u2, vp, v1, v2; up = u*c; u1 = (u - up) + up; u2 = u - u1; vp = v*c; v1 = (v - vp) + vp; v2 = v - v1; double rh = u*v; double rl = (((u1*v1 - rh) + (u1*v2)) + (u2*v1)) + (u2*v2); *rhi = rh; *rlo = rl; } void AddD(double *rhi, double *rlo, double a, double b) { double zhi, zlo; zhi = a + b; if(fabs(a) > fabs(b)) { zlo = zhi - a; zlo = b - zlo; } else { zlo = zhi - b; zlo = a - zlo; } *rhi = zhi; *rlo = zlo; } void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl) { double mh, ml; double c = 134217729.0; double up, u1, u2, vp, v1, v2; up = xh*c; u1 = (xh - up) + up; u2 = xh - u1; vp = yh*c; v1 = (yh - vp) + vp; v2 = yh - v1; mh = xh*yh; ml = (((u1*v1 - mh) + (u1*v2)) + (u2*v1)) + (u2*v2); ml += xh*yl + xl*yh; *rhi = mh + ml; *rlo = (mh - (*rhi)) + ml; } void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl) { double r, s; r = xh + yh; s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl) : (yh - r + xh + xl + yl); *rhi = r + s; *rlo = (r - (*rhi)) + s; } void DivideDD(double *chi, double *clo, double a, double b) { *chi = a / b; double rhi, rlo; MulD(&rhi, &rlo, *chi, b); AddDD(&rhi, &rlo, -rhi, -rlo, a, 0.0); *clo = rhi / b; } // These functions comapre two floats/doubles. Since some platforms may choose to // flush denormals to zeros before comparison, comparison like a < b may give wrong // result in "certain cases" where we do need correct compasion result when operands // are denormals .... these functions comapre floats/doubles using signed integer/long int // rep. In other cases, when flushing to zeros is fine, these should not be used. // Also these doesn't check for nans and assume nans are handled separately as special edge case // by the caller which calls these functions // return 0 if both are equal, 1 if x > y and -1 if x < y. inline int compareFloats(float x, float y) { int32f_t a, b; a.f = x; b.f = y; if( a.i & 0x80000000 ) a.i = 0x80000000 - a.i; if( b.i & 0x80000000 ) b.i = 0x80000000 - b.i; if( a.i == b.i ) return 0; return a.i < b.i ? -1 : 1; } inline int compareDoubles(double x, double y) { int64d_t a, b; a.d = x; b.d = y; if( a.l & 0x8000000000000000LL ) a.l = 0x8000000000000000LL - a.l; if( b.l & 0x8000000000000000LL ) b.l = 0x8000000000000000LL - b.l; if( a.l == b.l ) return 0; return a.l < b.l ? -1 : 1; } void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed) { char const *fpSizeStr = NULL; char const *fpFastRelaxedStr = ""; switch (float_size) { case sizeof(cl_double): fpSizeStr = "fp64"; break; case sizeof(cl_float): fpSizeStr = "fp32"; break; case sizeof(cl_half): fpSizeStr = "fp16"; break; } if (isFastRelaxed) { fpFastRelaxedStr = "rlx"; } vlog("%15s %4s %4s",fname, fpSizeStr, fpFastRelaxedStr); } float getAllowedUlpError(const Func *f, const bool relaxed) { float ulp; if (relaxed) { if (gIsEmbedded) { ulp = f->relaxed_embedded_error; } else { ulp = f->relaxed_error; } } else { if (gIsEmbedded) { ulp = f->float_embedded_ulps; } else { ulp = f->float_ulps; } } return ulp; }