mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-26 00:39:03 +00:00
Apply clang-format on math_brute_force (#1104)
Signed-off-by: Marco Antognini <marco.antognini@arm.com>
This commit is contained in:
@@ -16,13 +16,13 @@
|
|||||||
#include "FunctionList.h"
|
#include "FunctionList.h"
|
||||||
#include "reference_math.h"
|
#include "reference_math.h"
|
||||||
|
|
||||||
#define FTZ_ON 1
|
#define FTZ_ON 1
|
||||||
#define FTZ_OFF 0
|
#define FTZ_OFF 0
|
||||||
#define EXACT 0.0f
|
#define EXACT 0.0f
|
||||||
#define RELAXED_ON 1
|
#define RELAXED_ON 1
|
||||||
#define RELAXED_OFF 0
|
#define RELAXED_OFF 0
|
||||||
|
|
||||||
#define STRINGIFY( _s) #_s
|
#define STRINGIFY(_s) #_s
|
||||||
|
|
||||||
// Only use ulps information in spir test
|
// Only use ulps information in spir test
|
||||||
#ifdef FUNCTION_LIST_ULPS_ONLY
|
#ifdef FUNCTION_LIST_ULPS_ONLY
|
||||||
@@ -51,25 +51,25 @@
|
|||||||
STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
|
STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
|
||||||
_embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \
|
_embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \
|
||||||
}
|
}
|
||||||
#define unaryF NULL
|
#define unaryF NULL
|
||||||
#define i_unaryF NULL
|
#define i_unaryF NULL
|
||||||
#define unaryF_u NULL
|
#define unaryF_u NULL
|
||||||
#define macro_unaryF NULL
|
#define macro_unaryF NULL
|
||||||
#define binaryF NULL
|
#define binaryF NULL
|
||||||
#define binaryF_nextafter NULL
|
#define binaryF_nextafter NULL
|
||||||
#define binaryOperatorF NULL
|
#define binaryOperatorF NULL
|
||||||
#define binaryF_i NULL
|
#define binaryF_i NULL
|
||||||
#define macro_binaryF NULL
|
#define macro_binaryF NULL
|
||||||
#define ternaryF NULL
|
#define ternaryF NULL
|
||||||
#define unaryF_two_results NULL
|
#define unaryF_two_results NULL
|
||||||
#define unaryF_two_results_i NULL
|
#define unaryF_two_results_i NULL
|
||||||
#define binaryF_two_results_i NULL
|
#define binaryF_two_results_i NULL
|
||||||
#define mad_function NULL
|
#define mad_function NULL
|
||||||
|
|
||||||
#define reference_sqrt NULL
|
#define reference_sqrt NULL
|
||||||
#define reference_sqrtl NULL
|
#define reference_sqrtl NULL
|
||||||
#define reference_divide NULL
|
#define reference_divide NULL
|
||||||
#define reference_dividel NULL
|
#define reference_dividel NULL
|
||||||
#define reference_relaxed_divide NULL
|
#define reference_relaxed_divide NULL
|
||||||
|
|
||||||
#else // FUNCTION_LIST_ULPS_ONLY
|
#else // FUNCTION_LIST_ULPS_ONLY
|
||||||
@@ -102,24 +102,27 @@
|
|||||||
_embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \
|
_embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type \
|
||||||
}
|
}
|
||||||
|
|
||||||
extern const vtbl _unary; // float foo( float )
|
extern const vtbl _unary; // float foo( float )
|
||||||
extern const vtbl _unary_u; // float foo( uint ), double foo( ulong )
|
extern const vtbl _unary_u; // float foo( uint ), double foo( ulong )
|
||||||
extern const vtbl _i_unary; // int foo( float )
|
extern const vtbl _i_unary; // int foo( float )
|
||||||
extern const vtbl _macro_unary; // int foo( float ), returns {0,1} for scalar, { 0, -1 } for vector
|
extern const vtbl _macro_unary; // int foo( float ), returns {0,1} for scalar,
|
||||||
extern const vtbl _binary; // float foo( float, float )
|
// { 0, -1 } for vector
|
||||||
extern const vtbl _binary_nextafter; // float foo( float, float ), special handling for nextafter
|
extern const vtbl _binary; // float foo( float, float )
|
||||||
extern const vtbl _binary_operator; // float .op. float
|
extern const vtbl _binary_nextafter; // float foo( float, float ), special
|
||||||
extern const vtbl _macro_binary; // int foo( float, float ), returns {0,1} for scalar, { 0, -1 } for vector
|
// handling for nextafter
|
||||||
extern const vtbl _binary_i; // float foo( float, int )
|
extern const vtbl _binary_operator; // float .op. float
|
||||||
extern const vtbl _ternary; // float foo( float, float, float )
|
extern const vtbl _macro_binary; // int foo( float, float ), returns {0,1} for
|
||||||
extern const vtbl _unary_two_results; // float foo( float, float * )
|
// scalar, { 0, -1 } for vector
|
||||||
|
extern const vtbl _binary_i; // float foo( float, int )
|
||||||
|
extern const vtbl _ternary; // float foo( float, float, float )
|
||||||
|
extern const vtbl _unary_two_results; // float foo( float, float * )
|
||||||
extern const vtbl _unary_two_results_i; // float foo( float, int * )
|
extern const vtbl _unary_two_results_i; // float foo( float, int * )
|
||||||
extern const vtbl _binary_two_results_i; // float foo( float, float, int * )
|
extern const vtbl _binary_two_results_i; // float foo( float, float, int * )
|
||||||
extern const vtbl _mad_tbl; // float mad( float, float, float )
|
extern const vtbl _mad_tbl; // float mad( float, float, float )
|
||||||
|
|
||||||
#define unaryF &_unary
|
#define unaryF &_unary
|
||||||
#define i_unaryF &_i_unary
|
#define i_unaryF &_i_unary
|
||||||
#define unaryF_u &_unary_u
|
#define unaryF_u &_unary_u
|
||||||
#define macro_unaryF &_macro_unary
|
#define macro_unaryF &_macro_unary
|
||||||
#define binaryF &_binary
|
#define binaryF &_binary
|
||||||
#define binaryF_nextafter &_binary_nextafter
|
#define binaryF_nextafter &_binary_nextafter
|
||||||
@@ -127,10 +130,10 @@ extern const vtbl _mad_tbl; // float mad( float, float, float )
|
|||||||
#define binaryF_i &_binary_i
|
#define binaryF_i &_binary_i
|
||||||
#define macro_binaryF &_macro_binary
|
#define macro_binaryF &_macro_binary
|
||||||
#define ternaryF &_ternary
|
#define ternaryF &_ternary
|
||||||
#define unaryF_two_results &_unary_two_results
|
#define unaryF_two_results &_unary_two_results
|
||||||
#define unaryF_two_results_i &_unary_two_results_i
|
#define unaryF_two_results_i &_unary_two_results_i
|
||||||
#define binaryF_two_results_i &_binary_two_results_i
|
#define binaryF_two_results_i &_binary_two_results_i
|
||||||
#define mad_function &_mad_tbl
|
#define mad_function &_mad_tbl
|
||||||
|
|
||||||
#endif // FUNCTION_LIST_ULPS_ONLY
|
#endif // FUNCTION_LIST_ULPS_ONLY
|
||||||
|
|
||||||
@@ -325,4 +328,4 @@ const Func functionList[] = {
|
|||||||
OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||||
};
|
};
|
||||||
|
|
||||||
const size_t functionListCount = sizeof( functionList ) / sizeof( functionList[0] );
|
const size_t functionListCount = sizeof(functionList) / sizeof(functionList[0]);
|
||||||
|
|||||||
@@ -22,80 +22,77 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
#include <OpenCL/opencl.h>
|
#include <OpenCL/opencl.h>
|
||||||
#else
|
#else
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "harness/mt19937.h"
|
#include "harness/mt19937.h"
|
||||||
|
|
||||||
typedef union fptr
|
typedef union fptr {
|
||||||
{
|
void *p;
|
||||||
void *p;
|
double (*f_f)(double);
|
||||||
double (*f_f)(double);
|
double (*f_u)(cl_uint);
|
||||||
double (*f_u)(cl_uint);
|
int (*i_f)(double);
|
||||||
int (*i_f)(double);
|
int (*i_f_f)(float);
|
||||||
int (*i_f_f)(float);
|
float (*f_ff_f)(float, float);
|
||||||
float (*f_ff_f)(float, float);
|
double (*f_ff)(double, double);
|
||||||
double (*f_ff)(double, double);
|
int (*i_ff)(double, double);
|
||||||
int (*i_ff)(double, double);
|
double (*f_fi)(double, int);
|
||||||
double (*f_fi)(double, int);
|
double (*f_fpf)(double, double *);
|
||||||
double (*f_fpf)(double, double*);
|
double (*f_fpI)(double, int *);
|
||||||
double (*f_fpI)(double, int*);
|
double (*f_ffpI)(double, double, int *);
|
||||||
double (*f_ffpI)(double, double, int*);
|
double (*f_fff)(double, double, double);
|
||||||
double (*f_fff)(double, double, double );
|
float (*f_fma)(float, float, float, int);
|
||||||
float (*f_fma)(float, float, float, int);
|
} fptr;
|
||||||
}fptr;
|
|
||||||
|
|
||||||
typedef union dptr
|
typedef union dptr {
|
||||||
{
|
void *p;
|
||||||
void *p;
|
long double (*f_f)(long double);
|
||||||
long double (*f_f)(long double);
|
long double (*f_u)(cl_ulong);
|
||||||
long double (*f_u)(cl_ulong);
|
int (*i_f)(long double);
|
||||||
int (*i_f)(long double);
|
long double (*f_ff)(long double, long double);
|
||||||
long double (*f_ff)(long double, long double);
|
int (*i_ff)(long double, long double);
|
||||||
int (*i_ff)(long double, long double);
|
long double (*f_fi)(long double, int);
|
||||||
long double (*f_fi)(long double, int);
|
long double (*f_fpf)(long double, long double *);
|
||||||
long double (*f_fpf)(long double, long double*);
|
long double (*f_fpI)(long double, int *);
|
||||||
long double (*f_fpI)(long double, int*);
|
long double (*f_ffpI)(long double, long double, int *);
|
||||||
long double (*f_ffpI)(long double, long double, int*);
|
long double (*f_fff)(long double, long double, long double);
|
||||||
long double (*f_fff)(long double, long double, long double);
|
} dptr;
|
||||||
}dptr;
|
|
||||||
|
|
||||||
struct Func;
|
struct Func;
|
||||||
|
|
||||||
typedef struct vtbl
|
typedef struct vtbl
|
||||||
{
|
{
|
||||||
const char *type_name;
|
const char *type_name;
|
||||||
int (*TestFunc)(const struct Func *, MTdata, bool);
|
int (*TestFunc)(const struct Func *, MTdata, bool);
|
||||||
int (*DoubleTestFunc)(
|
int (*DoubleTestFunc)(
|
||||||
const struct Func *, MTdata,
|
const struct Func *, MTdata,
|
||||||
bool); // may be NULL if function is single precision only
|
bool); // may be NULL if function is single precision only
|
||||||
}vtbl;
|
} vtbl;
|
||||||
|
|
||||||
typedef struct Func
|
typedef struct Func
|
||||||
{
|
{
|
||||||
const char *name; // common name, to be used as an argument in the shell
|
const char *name; // common name, to be used as an argument in the shell
|
||||||
const char *nameInCode; // name as it appears in the __kernel, usually the same as name, but different for multiplication
|
const char *nameInCode; // name as it appears in the __kernel, usually the
|
||||||
fptr func;
|
// same as name, but different for multiplication
|
||||||
dptr dfunc;
|
fptr func;
|
||||||
fptr rfunc;
|
dptr dfunc;
|
||||||
float float_ulps;
|
fptr rfunc;
|
||||||
float double_ulps;
|
float float_ulps;
|
||||||
float float_embedded_ulps;
|
float double_ulps;
|
||||||
float relaxed_error;
|
float float_embedded_ulps;
|
||||||
float relaxed_embedded_error;
|
float relaxed_error;
|
||||||
int ftz;
|
float relaxed_embedded_error;
|
||||||
int relaxed;
|
int ftz;
|
||||||
const vtbl *vtbl_ptr;
|
int relaxed;
|
||||||
}Func;
|
const vtbl *vtbl_ptr;
|
||||||
|
} Func;
|
||||||
|
|
||||||
|
|
||||||
extern const Func functionList[];
|
extern const Func functionList[];
|
||||||
|
|
||||||
extern const size_t functionListCount;
|
extern const size_t functionListCount;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
// You may obtain a copy of the License at
|
// You may obtain a copy of the License at
|
||||||
@@ -16,103 +16,94 @@
|
|||||||
#include "Sleep.h"
|
#include "Sleep.h"
|
||||||
#include "Utility.h"
|
#include "Utility.h"
|
||||||
|
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
#include <IOKit/pwr_mgt/IOPMLib.h>
|
#include <IOKit/pwr_mgt/IOPMLib.h>
|
||||||
#include <IOKit/IOMessage.h>
|
#include <IOKit/IOMessage.h>
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
io_connect_t connection;
|
io_connect_t connection;
|
||||||
IONotificationPortRef port;
|
IONotificationPortRef port;
|
||||||
io_object_t iterator;
|
io_object_t iterator;
|
||||||
}sleepInfo;
|
} sleepInfo;
|
||||||
|
|
||||||
void sleepCallback( void * refcon,
|
void sleepCallback(void* refcon, io_service_t service, natural_t messageType,
|
||||||
io_service_t service,
|
void* messageArgument);
|
||||||
natural_t messageType,
|
|
||||||
void * messageArgument );
|
|
||||||
|
|
||||||
void sleepCallback( void * refcon UNUSED,
|
void sleepCallback(void* refcon UNUSED, io_service_t service UNUSED,
|
||||||
io_service_t service UNUSED,
|
natural_t messageType, void* messageArgument)
|
||||||
natural_t messageType,
|
{
|
||||||
void * messageArgument )
|
|
||||||
{
|
|
||||||
|
|
||||||
IOReturn result;
|
IOReturn result;
|
||||||
/*
|
/*
|
||||||
service -- The IOService whose state has changed.
|
service -- The IOService whose state has changed.
|
||||||
messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the IOService's family.
|
messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the
|
||||||
messageArgument -- An argument for the message, dependent on the messageType.
|
IOService's family. messageArgument -- An argument for the message,
|
||||||
|
dependent on the messageType.
|
||||||
*/
|
*/
|
||||||
switch ( messageType )
|
switch (messageType)
|
||||||
{
|
{
|
||||||
case kIOMessageSystemWillSleep:
|
case kIOMessageSystemWillSleep:
|
||||||
// Handle demand sleep (such as sleep caused by running out of
|
// Handle demand sleep (such as sleep caused by running out of
|
||||||
// batteries, closing the lid of a laptop, or selecting
|
// batteries, closing the lid of a laptop, or selecting
|
||||||
// sleep from the Apple menu.
|
// sleep from the Apple menu.
|
||||||
IOAllowPowerChange(sleepInfo.connection,(long)messageArgument);
|
IOAllowPowerChange(sleepInfo.connection, (long)messageArgument);
|
||||||
vlog( "Hard sleep occurred.\n" );
|
vlog("Hard sleep occurred.\n");
|
||||||
break;
|
break;
|
||||||
case kIOMessageCanSystemSleep:
|
case kIOMessageCanSystemSleep:
|
||||||
// In this case, the computer has been idle for several minutes
|
// In this case, the computer has been idle for several minutes
|
||||||
// and will sleep soon so you must either allow or cancel
|
// and will sleep soon so you must either allow or cancel
|
||||||
// this notification. Important: if you don’t respond, there will
|
// this notification. Important: if you don’t respond, there will
|
||||||
// be a 30-second timeout before the computer sleeps.
|
// be a 30-second timeout before the computer sleeps.
|
||||||
// IOCancelPowerChange(root_port,(long)messageArgument);
|
// IOCancelPowerChange(root_port,(long)messageArgument);
|
||||||
result = IOCancelPowerChange(sleepInfo.connection,(long)messageArgument);
|
result = IOCancelPowerChange(sleepInfo.connection,
|
||||||
if( kIOReturnSuccess != result )
|
(long)messageArgument);
|
||||||
vlog( "sleep prevention failed. (%d)\n", result);
|
if (kIOReturnSuccess != result)
|
||||||
|
vlog("sleep prevention failed. (%d)\n", result);
|
||||||
|
break;
|
||||||
|
case kIOMessageSystemHasPoweredOn:
|
||||||
|
// Handle wakeup.
|
||||||
break;
|
break;
|
||||||
case kIOMessageSystemHasPoweredOn:
|
|
||||||
// Handle wakeup.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
void PreventSleep(void)
|
||||||
|
|
||||||
|
|
||||||
void PreventSleep( void )
|
|
||||||
{
|
{
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
vlog( "Disabling sleep... " );
|
vlog("Disabling sleep... ");
|
||||||
sleepInfo.iterator = (io_object_t) 0;
|
sleepInfo.iterator = (io_object_t)0;
|
||||||
sleepInfo.port = NULL;
|
sleepInfo.port = NULL;
|
||||||
sleepInfo.connection = IORegisterForSystemPower
|
sleepInfo.connection = IORegisterForSystemPower(
|
||||||
(
|
&sleepInfo, // void * refcon,
|
||||||
&sleepInfo, //void * refcon,
|
&sleepInfo.port, // IONotificationPortRef * thePortRef,
|
||||||
&sleepInfo.port, //IONotificationPortRef * thePortRef,
|
sleepCallback, // IOServiceInterestCallback callback,
|
||||||
sleepCallback, //IOServiceInterestCallback callback,
|
&sleepInfo.iterator // io_object_t * notifier
|
||||||
&sleepInfo.iterator //io_object_t * notifier
|
);
|
||||||
);
|
|
||||||
|
|
||||||
if( (io_connect_t) 0 == sleepInfo.connection )
|
if ((io_connect_t)0 == sleepInfo.connection)
|
||||||
vlog( "failed.\n" );
|
vlog("failed.\n");
|
||||||
else
|
else
|
||||||
vlog( "done.\n" );
|
vlog("done.\n");
|
||||||
|
|
||||||
CFRunLoopAddSource(CFRunLoopGetCurrent(),
|
CFRunLoopAddSource(CFRunLoopGetCurrent(),
|
||||||
IONotificationPortGetRunLoopSource(sleepInfo.port),
|
IONotificationPortGetRunLoopSource(sleepInfo.port),
|
||||||
kCFRunLoopDefaultMode);
|
kCFRunLoopDefaultMode);
|
||||||
#else
|
#else
|
||||||
vlog( "*** PreventSleep() is not implemented on this platform.\n" );
|
vlog("*** PreventSleep() is not implemented on this platform.\n");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void ResumeSleep( void )
|
void ResumeSleep(void)
|
||||||
{
|
{
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
IOReturn result = IODeregisterForSystemPower ( &sleepInfo.iterator );
|
IOReturn result = IODeregisterForSystemPower(&sleepInfo.iterator);
|
||||||
if( 0 != result )
|
if (0 != result)
|
||||||
vlog( "Got error %d restoring sleep \n", result );
|
vlog("Got error %d restoring sleep \n", result);
|
||||||
else
|
else
|
||||||
vlog( "Sleep restored.\n" );
|
vlog("Sleep restored.\n");
|
||||||
#else
|
#else
|
||||||
vlog( "*** ResumeSleep() is not implemented on this platform.\n" );
|
vlog("*** ResumeSleep() is not implemented on this platform.\n");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
// You may obtain a copy of the License at
|
// You may obtain a copy of the License at
|
||||||
@@ -16,9 +16,7 @@
|
|||||||
#ifndef SLEEP_H
|
#ifndef SLEEP_H
|
||||||
#define SLEEP_H
|
#define SLEEP_H
|
||||||
|
|
||||||
void PreventSleep( void );
|
void PreventSleep(void);
|
||||||
void ResumeSleep( void );
|
void ResumeSleep(void);
|
||||||
|
|
||||||
#endif /* SLEEP_H */
|
#endif /* SLEEP_H */
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -17,9 +17,9 @@
|
|||||||
#include "FunctionList.h"
|
#include "FunctionList.h"
|
||||||
|
|
||||||
#if defined(__PPC__)
|
#if defined(__PPC__)
|
||||||
// Global varaiable used to hold the FPU control register state. The FPSCR register can not
|
// Global varaiable used to hold the FPU control register state. The FPSCR
|
||||||
// be used because not all Power implementations retain or observed the NI (non-IEEE
|
// register can not be used because not all Power implementations retain or
|
||||||
// mode) bit.
|
// observed the NI (non-IEEE mode) bit.
|
||||||
__thread fpu_control_t fpu_control = 0;
|
__thread fpu_control_t fpu_control = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -28,16 +28,16 @@ void MulD(double *rhi, double *rlo, double u, double v)
|
|||||||
const double c = 134217729.0; // 1+2^27
|
const double c = 134217729.0; // 1+2^27
|
||||||
double up, u1, u2, vp, v1, v2;
|
double up, u1, u2, vp, v1, v2;
|
||||||
|
|
||||||
up = u*c;
|
up = u * c;
|
||||||
u1 = (u - up) + up;
|
u1 = (u - up) + up;
|
||||||
u2 = u - u1;
|
u2 = u - u1;
|
||||||
|
|
||||||
vp = v*c;
|
vp = v * c;
|
||||||
v1 = (v - vp) + vp;
|
v1 = (v - vp) + vp;
|
||||||
v2 = v - v1;
|
v2 = v - v1;
|
||||||
|
|
||||||
double rh = u*v;
|
double rh = u * v;
|
||||||
double rl = (((u1*v1 - rh) + (u1*v2)) + (u2*v1)) + (u2*v2);
|
double rl = (((u1 * v1 - rh) + (u1 * v2)) + (u2 * v1)) + (u2 * v2);
|
||||||
|
|
||||||
*rhi = rh;
|
*rhi = rh;
|
||||||
*rlo = rl;
|
*rlo = rl;
|
||||||
@@ -47,11 +47,13 @@ void AddD(double *rhi, double *rlo, double a, double b)
|
|||||||
{
|
{
|
||||||
double zhi, zlo;
|
double zhi, zlo;
|
||||||
zhi = a + b;
|
zhi = a + b;
|
||||||
if(fabs(a) > fabs(b)) {
|
if (fabs(a) > fabs(b))
|
||||||
|
{
|
||||||
zlo = zhi - a;
|
zlo = zhi - a;
|
||||||
zlo = b - zlo;
|
zlo = b - zlo;
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
zlo = zhi - b;
|
zlo = zhi - b;
|
||||||
zlo = a - zlo;
|
zlo = a - zlo;
|
||||||
}
|
}
|
||||||
@@ -66,17 +68,17 @@ void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
|
|||||||
double c = 134217729.0;
|
double c = 134217729.0;
|
||||||
double up, u1, u2, vp, v1, v2;
|
double up, u1, u2, vp, v1, v2;
|
||||||
|
|
||||||
up = xh*c;
|
up = xh * c;
|
||||||
u1 = (xh - up) + up;
|
u1 = (xh - up) + up;
|
||||||
u2 = xh - u1;
|
u2 = xh - u1;
|
||||||
|
|
||||||
vp = yh*c;
|
vp = yh * c;
|
||||||
v1 = (yh - vp) + vp;
|
v1 = (yh - vp) + vp;
|
||||||
v2 = yh - v1;
|
v2 = yh - v1;
|
||||||
|
|
||||||
mh = xh*yh;
|
mh = xh * yh;
|
||||||
ml = (((u1*v1 - mh) + (u1*v2)) + (u2*v1)) + (u2*v2);
|
ml = (((u1 * v1 - mh) + (u1 * v2)) + (u2 * v1)) + (u2 * v2);
|
||||||
ml += xh*yl + xl*yh;
|
ml += xh * yl + xl * yh;
|
||||||
|
|
||||||
*rhi = mh + ml;
|
*rhi = mh + ml;
|
||||||
*rlo = (mh - (*rhi)) + ml;
|
*rlo = (mh - (*rhi)) + ml;
|
||||||
@@ -86,7 +88,8 @@ void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
|
|||||||
{
|
{
|
||||||
double r, s;
|
double r, s;
|
||||||
r = xh + yh;
|
r = xh + yh;
|
||||||
s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl) : (yh - r + xh + xl + yl);
|
s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl)
|
||||||
|
: (yh - r + xh + xl + yl);
|
||||||
*rhi = r + s;
|
*rhi = r + s;
|
||||||
*rlo = (r - (*rhi)) + s;
|
*rlo = (r - (*rhi)) + s;
|
||||||
}
|
}
|
||||||
@@ -100,72 +103,61 @@ void DivideDD(double *chi, double *clo, double a, double b)
|
|||||||
*clo = rhi / b;
|
*clo = rhi / b;
|
||||||
}
|
}
|
||||||
|
|
||||||
// These functions comapre two floats/doubles. Since some platforms may choose to
|
// These functions comapre two floats/doubles. Since some platforms may choose
|
||||||
// flush denormals to zeros before comparison, comparison like a < b may give wrong
|
// to flush denormals to zeros before comparison, comparison like a < b may give
|
||||||
// result in "certain cases" where we do need correct compasion result when operands
|
// wrong result in "certain cases" where we do need correct compasion result
|
||||||
// are denormals .... these functions comapre floats/doubles using signed integer/long int
|
// when operands are denormals .... these functions comapre floats/doubles using
|
||||||
// rep. In other cases, when flushing to zeros is fine, these should not be used.
|
// signed integer/long int rep. In other cases, when flushing to zeros is fine,
|
||||||
// Also these doesn't check for nans and assume nans are handled separately as special edge case
|
// these should not be used. Also these doesn't check for nans and assume nans
|
||||||
// by the caller which calls these functions
|
// are handled separately as special edge case by the caller which calls these
|
||||||
// return 0 if both are equal, 1 if x > y and -1 if x < y.
|
// functions return 0 if both are equal, 1 if x > y and -1 if x < y.
|
||||||
|
|
||||||
inline
|
inline int compareFloats(float x, float y)
|
||||||
int compareFloats(float x, float y)
|
|
||||||
{
|
{
|
||||||
int32f_t a, b;
|
int32f_t a, b;
|
||||||
|
|
||||||
a.f = x;
|
a.f = x;
|
||||||
b.f = y;
|
b.f = y;
|
||||||
|
|
||||||
if( a.i & 0x80000000 )
|
if (a.i & 0x80000000) a.i = 0x80000000 - a.i;
|
||||||
a.i = 0x80000000 - a.i;
|
if (b.i & 0x80000000) b.i = 0x80000000 - b.i;
|
||||||
if( b.i & 0x80000000 )
|
|
||||||
b.i = 0x80000000 - b.i;
|
|
||||||
|
|
||||||
if( a.i == b.i )
|
if (a.i == b.i) return 0;
|
||||||
return 0;
|
|
||||||
|
|
||||||
return a.i < b.i ? -1 : 1;
|
return a.i < b.i ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline
|
inline int compareDoubles(double x, double y)
|
||||||
int compareDoubles(double x, double y)
|
|
||||||
{
|
{
|
||||||
int64d_t a, b;
|
int64d_t a, b;
|
||||||
|
|
||||||
a.d = x;
|
a.d = x;
|
||||||
b.d = y;
|
b.d = y;
|
||||||
|
|
||||||
if( a.l & 0x8000000000000000LL )
|
if (a.l & 0x8000000000000000LL) a.l = 0x8000000000000000LL - a.l;
|
||||||
a.l = 0x8000000000000000LL - a.l;
|
if (b.l & 0x8000000000000000LL) b.l = 0x8000000000000000LL - b.l;
|
||||||
if( b.l & 0x8000000000000000LL )
|
|
||||||
b.l = 0x8000000000000000LL - b.l;
|
|
||||||
|
|
||||||
if( a.l == b.l )
|
if (a.l == b.l) return 0;
|
||||||
return 0;
|
|
||||||
|
|
||||||
return a.l < b.l ? -1 : 1;
|
return a.l < b.l ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed)
|
void logFunctionInfo(const char *fname, unsigned int float_size,
|
||||||
|
unsigned int isFastRelaxed)
|
||||||
{
|
{
|
||||||
char const *fpSizeStr = NULL;
|
char const *fpSizeStr = NULL;
|
||||||
char const *fpFastRelaxedStr = "";
|
char const *fpFastRelaxedStr = "";
|
||||||
switch (float_size) {
|
switch (float_size)
|
||||||
case sizeof(cl_double):
|
{
|
||||||
fpSizeStr = "fp64";
|
case sizeof(cl_double): fpSizeStr = "fp64"; break;
|
||||||
break;
|
case sizeof(cl_float): fpSizeStr = "fp32"; break;
|
||||||
case sizeof(cl_float):
|
case sizeof(cl_half): fpSizeStr = "fp16"; break;
|
||||||
fpSizeStr = "fp32";
|
|
||||||
break;
|
|
||||||
case sizeof(cl_half):
|
|
||||||
fpSizeStr = "fp16";
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
if (isFastRelaxed) {
|
if (isFastRelaxed)
|
||||||
|
{
|
||||||
fpFastRelaxedStr = "rlx";
|
fpFastRelaxedStr = "rlx";
|
||||||
}
|
}
|
||||||
vlog("%15s %4s %4s",fname, fpSizeStr, fpFastRelaxedStr);
|
vlog("%15s %4s %4s", fname, fpSizeStr, fpFastRelaxedStr);
|
||||||
}
|
}
|
||||||
|
|
||||||
float getAllowedUlpError(const Func *f, const bool relaxed)
|
float getAllowedUlpError(const Func *f, const bool relaxed)
|
||||||
|
|||||||
@@ -30,13 +30,13 @@
|
|||||||
#include "harness/ThreadPool.h"
|
#include "harness/ThreadPool.h"
|
||||||
#include "harness/conversions.h"
|
#include "harness/conversions.h"
|
||||||
|
|
||||||
#define BUFFER_SIZE (1024*1024*2)
|
#define BUFFER_SIZE (1024 * 1024 * 2)
|
||||||
#define EMBEDDED_REDUCTION_FACTOR (64)
|
#define EMBEDDED_REDUCTION_FACTOR (64)
|
||||||
|
|
||||||
#if defined( __GNUC__ )
|
#if defined(__GNUC__)
|
||||||
#define UNUSED __attribute__ ((unused))
|
#define UNUSED __attribute__((unused))
|
||||||
#else
|
#else
|
||||||
#define UNUSED
|
#define UNUSED
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct Func;
|
struct Func;
|
||||||
@@ -44,62 +44,62 @@ struct Func;
|
|||||||
extern int gWimpyBufferSize;
|
extern int gWimpyBufferSize;
|
||||||
extern int gWimpyReductionFactor;
|
extern int gWimpyReductionFactor;
|
||||||
|
|
||||||
#define VECTOR_SIZE_COUNT 6
|
#define VECTOR_SIZE_COUNT 6
|
||||||
extern const char *sizeNames[VECTOR_SIZE_COUNT];
|
extern const char *sizeNames[VECTOR_SIZE_COUNT];
|
||||||
extern const int sizeValues[VECTOR_SIZE_COUNT];
|
extern const int sizeValues[VECTOR_SIZE_COUNT];
|
||||||
|
|
||||||
extern cl_device_id gDevice;
|
extern cl_device_id gDevice;
|
||||||
extern cl_context gContext;
|
extern cl_context gContext;
|
||||||
extern cl_command_queue gQueue;
|
extern cl_command_queue gQueue;
|
||||||
extern void *gIn;
|
extern void *gIn;
|
||||||
extern void *gIn2;
|
extern void *gIn2;
|
||||||
extern void *gIn3;
|
extern void *gIn3;
|
||||||
extern void *gOut_Ref;
|
extern void *gOut_Ref;
|
||||||
extern void *gOut_Ref2;
|
extern void *gOut_Ref2;
|
||||||
extern void *gOut[VECTOR_SIZE_COUNT];
|
extern void *gOut[VECTOR_SIZE_COUNT];
|
||||||
extern void *gOut2[VECTOR_SIZE_COUNT];
|
extern void *gOut2[VECTOR_SIZE_COUNT];
|
||||||
extern cl_mem gInBuffer;
|
extern cl_mem gInBuffer;
|
||||||
extern cl_mem gInBuffer2;
|
extern cl_mem gInBuffer2;
|
||||||
extern cl_mem gInBuffer3;
|
extern cl_mem gInBuffer3;
|
||||||
extern cl_mem gOutBuffer[VECTOR_SIZE_COUNT];
|
extern cl_mem gOutBuffer[VECTOR_SIZE_COUNT];
|
||||||
extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT];
|
extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT];
|
||||||
extern uint32_t gComputeDevices;
|
extern uint32_t gComputeDevices;
|
||||||
extern uint32_t gSimdSize;
|
extern uint32_t gSimdSize;
|
||||||
extern int gSkipCorrectnessTesting;
|
extern int gSkipCorrectnessTesting;
|
||||||
extern int gMeasureTimes;
|
extern int gMeasureTimes;
|
||||||
extern int gReportAverageTimes;
|
extern int gReportAverageTimes;
|
||||||
extern int gForceFTZ;
|
extern int gForceFTZ;
|
||||||
extern int gFastRelaxedDerived;
|
extern int gFastRelaxedDerived;
|
||||||
extern int gWimpyMode;
|
extern int gWimpyMode;
|
||||||
extern int gHasDouble;
|
extern int gHasDouble;
|
||||||
extern int gIsInRTZMode;
|
extern int gIsInRTZMode;
|
||||||
extern int gInfNanSupport;
|
extern int gInfNanSupport;
|
||||||
extern int gIsEmbedded;
|
extern int gIsEmbedded;
|
||||||
extern int gVerboseBruteForce;
|
extern int gVerboseBruteForce;
|
||||||
extern uint32_t gMaxVectorSizeIndex;
|
extern uint32_t gMaxVectorSizeIndex;
|
||||||
extern uint32_t gMinVectorSizeIndex;
|
extern uint32_t gMinVectorSizeIndex;
|
||||||
extern uint32_t gDeviceFrequency;
|
extern uint32_t gDeviceFrequency;
|
||||||
extern cl_device_fp_config gFloatCapabilities;
|
extern cl_device_fp_config gFloatCapabilities;
|
||||||
extern cl_device_fp_config gDoubleCapabilities;
|
extern cl_device_fp_config gDoubleCapabilities;
|
||||||
|
|
||||||
#define LOWER_IS_BETTER 0
|
#define LOWER_IS_BETTER 0
|
||||||
#define HIGHER_IS_BETTER 1
|
#define HIGHER_IS_BETTER 1
|
||||||
|
|
||||||
#include "harness/errorHelpers.h"
|
#include "harness/errorHelpers.h"
|
||||||
|
|
||||||
#if defined (_MSC_VER )
|
#if defined(_MSC_VER)
|
||||||
//Deal with missing scalbn on windows
|
// Deal with missing scalbn on windows
|
||||||
#define scalbnf( _a, _i ) ldexpf( _a, _i )
|
#define scalbnf(_a, _i) ldexpf(_a, _i)
|
||||||
#define scalbn( _a, _i ) ldexp( _a, _i )
|
#define scalbn(_a, _i) ldexp(_a, _i)
|
||||||
#define scalbnl( _a, _i ) ldexpl( _a, _i )
|
#define scalbnl(_a, _i) ldexpl(_a, _i)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
float Abs_Error( float test, double reference );
|
float Abs_Error(float test, double reference);
|
||||||
float Ulp_Error( float test, double reference );
|
float Ulp_Error(float test, double reference);
|
||||||
float Bruteforce_Ulp_Error_Double( double test, long double reference );
|
float Bruteforce_Ulp_Error_Double(double test, long double reference);
|
||||||
|
|
||||||
uint64_t GetTime( void );
|
uint64_t GetTime(void);
|
||||||
double SubtractTime( uint64_t endTime, uint64_t startTime );
|
double SubtractTime(uint64_t endTime, uint64_t startTime);
|
||||||
int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
|
int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
|
||||||
cl_program *p, bool relaxedMode);
|
cl_program *p, bool relaxedMode);
|
||||||
int MakeKernels(const char **c, cl_uint count, const char *name,
|
int MakeKernels(const char **c, cl_uint count, const char *name,
|
||||||
@@ -107,69 +107,84 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
|
|||||||
bool relaxedMode);
|
bool relaxedMode);
|
||||||
|
|
||||||
// used to convert a bucket of bits into a search pattern through double
|
// used to convert a bucket of bits into a search pattern through double
|
||||||
static inline double DoubleFromUInt32( uint32_t bits );
|
static inline double DoubleFromUInt32(uint32_t bits);
|
||||||
static inline double DoubleFromUInt32( uint32_t bits )
|
static inline double DoubleFromUInt32(uint32_t bits)
|
||||||
{
|
{
|
||||||
union{ uint64_t u; double d;} u;
|
union {
|
||||||
|
uint64_t u;
|
||||||
|
double d;
|
||||||
|
} u;
|
||||||
|
|
||||||
// split 0x89abcdef to 0x89abc00000000def
|
// split 0x89abcdef to 0x89abc00000000def
|
||||||
u.u = bits & 0xfffU;
|
u.u = bits & 0xfffU;
|
||||||
u.u |= (uint64_t) (bits & ~0xfffU) << 32;
|
u.u |= (uint64_t)(bits & ~0xfffU) << 32;
|
||||||
|
|
||||||
// sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
|
// sign extend the leading bit of def segment as sign bit so that the middle
|
||||||
|
// region consists of either all 1s or 0s
|
||||||
u.u -= (bits & 0x800U) << 1;
|
u.u -= (bits & 0x800U) << 1;
|
||||||
|
|
||||||
// return result
|
// return result
|
||||||
return u.d;
|
return u.d;
|
||||||
}
|
}
|
||||||
|
|
||||||
void _LogBuildError( cl_program p, int line, const char *file );
|
void _LogBuildError(cl_program p, int line, const char *file);
|
||||||
#define LogBuildError( program ) _LogBuildError( program, __LINE__, __FILE__ )
|
#define LogBuildError(program) _LogBuildError(program, __LINE__, __FILE__)
|
||||||
|
|
||||||
#define PERF_LOOP_COUNT 100
|
#define PERF_LOOP_COUNT 100
|
||||||
|
|
||||||
//The spec is fairly clear that we may enforce a hard cutoff to prevent premature flushing to zero.
|
// The spec is fairly clear that we may enforce a hard cutoff to prevent
|
||||||
// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + ulp_limit to be flushed to zero.
|
// premature flushing to zero.
|
||||||
static inline int IsFloatResultSubnormal( double x, float ulps )
|
// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN +
|
||||||
|
// ulp_limit to be flushed to zero.
|
||||||
|
static inline int IsFloatResultSubnormal(double x, float ulps)
|
||||||
{
|
{
|
||||||
x = fabs(x) - MAKE_HEX_DOUBLE( 0x1.0p-149, 0x1, -149) * (double) ulps;
|
x = fabs(x) - MAKE_HEX_DOUBLE(0x1.0p-149, 0x1, -149) * (double)ulps;
|
||||||
return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
|
return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int IsFloatResultSubnormalAbsError( double x , float abs_err)
|
static inline int IsFloatResultSubnormalAbsError(double x, float abs_err)
|
||||||
{
|
{
|
||||||
x = x - abs_err;
|
x = x - abs_err;
|
||||||
return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
|
return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int IsDoubleResultSubnormal( long double x, float ulps )
|
static inline int IsDoubleResultSubnormal(long double x, float ulps)
|
||||||
{
|
{
|
||||||
x = fabsl(x) - MAKE_HEX_LONG( 0x1.0p-1074, 0x1, -1074) * (long double) ulps;
|
x = fabsl(x) - MAKE_HEX_LONG(0x1.0p-1074, 0x1, -1074) * (long double)ulps;
|
||||||
return x < MAKE_HEX_LONG( 0x1.0p-1022, 0x1, -1022 );
|
return x < MAKE_HEX_LONG(0x1.0p-1022, 0x1, -1022);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int IsFloatInfinity(double x)
|
static inline int IsFloatInfinity(double x)
|
||||||
{
|
{
|
||||||
union { cl_float d; cl_uint u; } u;
|
union {
|
||||||
u.d = (cl_float) x;
|
cl_float d;
|
||||||
return ((u.u & 0x7fffffffU) == 0x7F800000U);
|
cl_uint u;
|
||||||
|
} u;
|
||||||
|
u.d = (cl_float)x;
|
||||||
|
return ((u.u & 0x7fffffffU) == 0x7F800000U);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int IsFloatMaxFloat(double x)
|
static inline int IsFloatMaxFloat(double x)
|
||||||
{
|
{
|
||||||
union { cl_float d; cl_uint u; } u;
|
union {
|
||||||
u.d = (cl_float) x;
|
cl_float d;
|
||||||
return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
|
cl_uint u;
|
||||||
|
} u;
|
||||||
|
u.d = (cl_float)x;
|
||||||
|
return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int IsFloatNaN(double x)
|
static inline int IsFloatNaN(double x)
|
||||||
{
|
{
|
||||||
union { cl_float d; cl_uint u; } u;
|
union {
|
||||||
u.d = (cl_float) x;
|
cl_float d;
|
||||||
return ((u.u & 0x7fffffffU) > 0x7F800000U);
|
cl_uint u;
|
||||||
|
} u;
|
||||||
|
u.d = (cl_float)x;
|
||||||
|
return ((u.u & 0x7fffffffU) > 0x7F800000U);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern cl_uint RoundUpToNextPowerOfTwo( cl_uint x );
|
extern cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
|
||||||
|
|
||||||
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
||||||
// (that's x87 default state). This causes problems with the tests that
|
// (that's x87 default state). This causes problems with the tests that
|
||||||
@@ -186,46 +201,50 @@ static inline void Force64BitFPUPrecision(void)
|
|||||||
// divergent code just use inline assembly which works for both.
|
// divergent code just use inline assembly which works for both.
|
||||||
unsigned short int orig_cw = 0;
|
unsigned short int orig_cw = 0;
|
||||||
unsigned short int new_cw = 0;
|
unsigned short int new_cw = 0;
|
||||||
__asm__ __volatile__ ("fstcw %0":"=m" (orig_cw));
|
__asm__ __volatile__("fstcw %0" : "=m"(orig_cw));
|
||||||
new_cw = orig_cw | 0x0300; // set precision to 64-bit
|
new_cw = orig_cw | 0x0300; // set precision to 64-bit
|
||||||
__asm__ __volatile__ ("fldcw %0"::"m" (new_cw));
|
__asm__ __volatile__("fldcw %0" ::"m"(new_cw));
|
||||||
#elif defined( _WIN32 ) && defined( __INTEL_COMPILER )
|
#elif defined(_WIN32) && defined(__INTEL_COMPILER)
|
||||||
// Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not* work on win.x64:
|
// Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not*
|
||||||
// > On the x64 architecture, changing the floating point precision is not supported.
|
// work on win.x64: > On the x64 architecture, changing the floating point
|
||||||
// (Taken from http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx)
|
// precision is not supported. (Taken from
|
||||||
|
// http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx)
|
||||||
int cw;
|
int cw;
|
||||||
__asm { fnstcw cw }; // Get current value of FPU control word.
|
__asm { fnstcw cw }
|
||||||
cw = cw & 0xfffffcff | ( 3 << 8 ); // Set Precision Control to Double Extended Precision.
|
; // Get current value of FPU control word.
|
||||||
__asm { fldcw cw }; // Set new value of FPU control word.
|
cw = cw & 0xfffffcff
|
||||||
|
| (3 << 8); // Set Precision Control to Double Extended Precision.
|
||||||
|
__asm { fldcw cw }
|
||||||
|
; // Set new value of FPU control word.
|
||||||
#else
|
#else
|
||||||
/* Implement for other platforms if needed */
|
/* Implement for other platforms if needed */
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
extern
|
extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
|
||||||
void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
|
|
||||||
|
|
||||||
typedef union
|
typedef union {
|
||||||
{
|
|
||||||
int32_t i;
|
int32_t i;
|
||||||
float f;
|
float f;
|
||||||
}int32f_t;
|
} int32f_t;
|
||||||
|
|
||||||
typedef union
|
typedef union {
|
||||||
{
|
|
||||||
int64_t l;
|
int64_t l;
|
||||||
double d;
|
double d;
|
||||||
}int64d_t;
|
} int64d_t;
|
||||||
|
|
||||||
void MulD(double *rhi, double *rlo, double u, double v);
|
void MulD(double *rhi, double *rlo, double u, double v);
|
||||||
void AddD(double *rhi, double *rlo, double a, double b);
|
void AddD(double *rhi, double *rlo, double a, double b);
|
||||||
void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
|
void MulDD(double *rhi, double *rlo, double xh, double xl, double yh,
|
||||||
void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
|
double yl);
|
||||||
|
void AddDD(double *rhi, double *rlo, double xh, double xl, double yh,
|
||||||
|
double yl);
|
||||||
void DivideDD(double *chi, double *clo, double a, double b);
|
void DivideDD(double *chi, double *clo, double a, double b);
|
||||||
int compareFloats(float x, float y);
|
int compareFloats(float x, float y);
|
||||||
int compareDoubles(double x, double y);
|
int compareDoubles(double x, double y);
|
||||||
|
|
||||||
void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed);
|
void logFunctionInfo(const char *fname, unsigned int float_size,
|
||||||
|
unsigned int isFastRelaxed);
|
||||||
|
|
||||||
float getAllowedUlpError(const Func *f, const bool relaxed);
|
float getAllowedUlpError(const Func *f, const bool relaxed);
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
// You may obtain a copy of the License at
|
// You may obtain a copy of the License at
|
||||||
@@ -33,60 +33,77 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
|
|||||||
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
||||||
cl_program *p, bool relaxedMode)
|
cl_program *p, bool relaxedMode)
|
||||||
{
|
{
|
||||||
const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n"
|
const char *c[] = { "__kernel void math_kernel",
|
||||||
"{\n"
|
sizeNames[vectorSize],
|
||||||
" int i = get_global_id(0);\n"
|
"( __global int",
|
||||||
" out[i] = ", name, "( in[i] );\n"
|
sizeNames[vectorSize],
|
||||||
"}\n"
|
"* out, __global float",
|
||||||
};
|
sizeNames[vectorSize],
|
||||||
const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in)\n"
|
"* in)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" size_t i = get_global_id(0);\n"
|
" int i = get_global_id(0);\n"
|
||||||
" if( i + 1 < get_global_size(0) )\n"
|
" out[i] = ",
|
||||||
" {\n"
|
name,
|
||||||
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
"( in[i] );\n"
|
||||||
" int3 i0 = ", name, "( f0 );\n"
|
"}\n" };
|
||||||
" vstore3( i0, 0, out + 3*i );\n"
|
const char *c3[] = {
|
||||||
" }\n"
|
"__kernel void math_kernel",
|
||||||
" else\n"
|
sizeNames[vectorSize],
|
||||||
" {\n"
|
"( __global int* out, __global float* in)\n"
|
||||||
" size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
|
"{\n"
|
||||||
" float3 f0;\n"
|
" size_t i = get_global_id(0);\n"
|
||||||
" switch( parity )\n"
|
" if( i + 1 < get_global_size(0) )\n"
|
||||||
" {\n"
|
" {\n"
|
||||||
" case 1:\n"
|
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
||||||
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
" int3 i0 = ",
|
||||||
" break;\n"
|
name,
|
||||||
" case 0:\n"
|
"( f0 );\n"
|
||||||
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
" vstore3( i0, 0, out + 3*i );\n"
|
||||||
" break;\n"
|
" }\n"
|
||||||
" }\n"
|
" else\n"
|
||||||
" int3 i0 = ", name, "( f0 );\n"
|
" {\n"
|
||||||
" switch( parity )\n"
|
" size_t parity = i & 1; // Figure out how many elements are "
|
||||||
" {\n"
|
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||||
" case 0:\n"
|
"buffer size \n"
|
||||||
" out[3*i+1] = i0.y; \n"
|
" float3 f0;\n"
|
||||||
" // fall through\n"
|
" switch( parity )\n"
|
||||||
" case 1:\n"
|
" {\n"
|
||||||
" out[3*i] = i0.x; \n"
|
" case 1:\n"
|
||||||
" break;\n"
|
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
||||||
" }\n"
|
" break;\n"
|
||||||
" }\n"
|
" case 0:\n"
|
||||||
"}\n"
|
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||||
};
|
" break;\n"
|
||||||
|
" }\n"
|
||||||
|
" int3 i0 = ",
|
||||||
|
name,
|
||||||
|
"( f0 );\n"
|
||||||
|
" switch( parity )\n"
|
||||||
|
" {\n"
|
||||||
|
" case 0:\n"
|
||||||
|
" out[3*i+1] = i0.y; \n"
|
||||||
|
" // fall through\n"
|
||||||
|
" case 1:\n"
|
||||||
|
" out[3*i] = i0.x; \n"
|
||||||
|
" break;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
const char **kern = c;
|
const char **kern = c;
|
||||||
size_t kernSize = sizeof(c)/sizeof(c[0]);
|
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||||
|
|
||||||
if( sizeValues[vectorSize] == 3 )
|
if (sizeValues[vectorSize] == 3)
|
||||||
{
|
{
|
||||||
kern = c3;
|
kern = c3;
|
||||||
kernSize = sizeof(c3)/sizeof(c3[0]);
|
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
char testName[32];
|
char testName[32];
|
||||||
snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
|
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||||
|
sizeNames[vectorSize]);
|
||||||
|
|
||||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||||
}
|
}
|
||||||
@@ -95,88 +112,109 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k,
|
|||||||
cl_program *p, bool relaxedMode)
|
cl_program *p, bool relaxedMode)
|
||||||
{
|
{
|
||||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||||
"__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n"
|
"__kernel void math_kernel",
|
||||||
"{\n"
|
sizeNames[vectorSize],
|
||||||
" int i = get_global_id(0);\n"
|
"( __global int",
|
||||||
" out[i] = ", name, "( in[i] );\n"
|
sizeNames[vectorSize],
|
||||||
"}\n"
|
"* out, __global double",
|
||||||
};
|
sizeNames[vectorSize],
|
||||||
|
"* in)\n"
|
||||||
const char *c3[] = {"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
|
||||||
"__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global double* in)\n"
|
|
||||||
"{\n"
|
"{\n"
|
||||||
" size_t i = get_global_id(0);\n"
|
" int i = get_global_id(0);\n"
|
||||||
" if( i + 1 < get_global_size(0) )\n"
|
" out[i] = ",
|
||||||
" {\n"
|
name,
|
||||||
" double3 f0 = vload3( 0, in + 3 * i );\n"
|
"( in[i] );\n"
|
||||||
" int3 i0 = ", name, "( f0 );\n"
|
"}\n" };
|
||||||
" vstore3( i0, 0, out + 3*i );\n"
|
|
||||||
" }\n"
|
const char *c3[] = {
|
||||||
" else\n"
|
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||||
" {\n"
|
"__kernel void math_kernel",
|
||||||
" size_t parity = i & 1; // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
|
sizeNames[vectorSize],
|
||||||
" double3 f0;\n"
|
"( __global int* out, __global double* in)\n"
|
||||||
" switch( parity )\n"
|
"{\n"
|
||||||
" {\n"
|
" size_t i = get_global_id(0);\n"
|
||||||
" case 1:\n"
|
" if( i + 1 < get_global_size(0) )\n"
|
||||||
" f0 = (double3)( in[3*i], NAN, NAN ); \n"
|
" {\n"
|
||||||
" break;\n"
|
" double3 f0 = vload3( 0, in + 3 * i );\n"
|
||||||
" case 0:\n"
|
" int3 i0 = ",
|
||||||
" f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
name,
|
||||||
" break;\n"
|
"( f0 );\n"
|
||||||
" }\n"
|
" vstore3( i0, 0, out + 3*i );\n"
|
||||||
" int3 i0 = ", name, "( f0 );\n"
|
" }\n"
|
||||||
" switch( parity )\n"
|
" else\n"
|
||||||
" {\n"
|
" {\n"
|
||||||
" case 0:\n"
|
" size_t parity = i & 1; // Figure out how many elements are "
|
||||||
" out[3*i+1] = i0.y; \n"
|
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||||
" // fall through\n"
|
"buffer size \n"
|
||||||
" case 1:\n"
|
" double3 f0;\n"
|
||||||
" out[3*i] = i0.x; \n"
|
" switch( parity )\n"
|
||||||
" break;\n"
|
" {\n"
|
||||||
" }\n"
|
" case 1:\n"
|
||||||
" }\n"
|
" f0 = (double3)( in[3*i], NAN, NAN ); \n"
|
||||||
"}\n"
|
" break;\n"
|
||||||
};
|
" case 0:\n"
|
||||||
|
" f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||||
|
" break;\n"
|
||||||
|
" }\n"
|
||||||
|
" int3 i0 = ",
|
||||||
|
name,
|
||||||
|
"( f0 );\n"
|
||||||
|
" switch( parity )\n"
|
||||||
|
" {\n"
|
||||||
|
" case 0:\n"
|
||||||
|
" out[3*i+1] = i0.y; \n"
|
||||||
|
" // fall through\n"
|
||||||
|
" case 1:\n"
|
||||||
|
" out[3*i] = i0.x; \n"
|
||||||
|
" break;\n"
|
||||||
|
" }\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
|
};
|
||||||
|
|
||||||
const char **kern = c;
|
const char **kern = c;
|
||||||
size_t kernSize = sizeof(c)/sizeof(c[0]);
|
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||||
|
|
||||||
if( sizeValues[vectorSize] == 3 )
|
if (sizeValues[vectorSize] == 3)
|
||||||
{
|
{
|
||||||
kern = c3;
|
kern = c3;
|
||||||
kernSize = sizeof(c3)/sizeof(c3[0]);
|
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
char testName[32];
|
char testName[32];
|
||||||
snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
|
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||||
|
sizeNames[vectorSize]);
|
||||||
|
|
||||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct BuildKernelInfo
|
typedef struct BuildKernelInfo
|
||||||
{
|
{
|
||||||
cl_uint offset; // the first vector size to build
|
cl_uint offset; // the first vector size to build
|
||||||
cl_kernel *kernels;
|
cl_kernel *kernels;
|
||||||
cl_program *programs;
|
cl_program *programs;
|
||||||
const char *nameInCode;
|
const char *nameInCode;
|
||||||
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
||||||
}BuildKernelInfo;
|
} BuildKernelInfo;
|
||||||
|
|
||||||
static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
|
static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
|
||||||
static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
|
void *p);
|
||||||
|
static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED,
|
||||||
|
void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo*) p;
|
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||||
cl_uint i = info->offset + job_id;
|
cl_uint i = info->offset + job_id;
|
||||||
return BuildKernel(info->nameInCode, i, info->kernels + i,
|
return BuildKernel(info->nameInCode, i, info->kernels + i,
|
||||||
info->programs + i, info->relaxedMode);
|
info->programs + i, info->relaxedMode);
|
||||||
}
|
}
|
||||||
|
|
||||||
static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
|
static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
|
||||||
static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
|
void *p);
|
||||||
|
static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED,
|
||||||
|
void *p)
|
||||||
{
|
{
|
||||||
BuildKernelInfo *info = (BuildKernelInfo*) p;
|
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
||||||
cl_uint i = info->offset + job_id;
|
cl_uint i = info->offset + job_id;
|
||||||
return BuildKernelDouble(info->nameInCode, i, info->kernels + i,
|
return BuildKernelDouble(info->nameInCode, i, info->kernels + i,
|
||||||
info->programs + i, info->relaxedMode);
|
info->programs + i, info->relaxedMode);
|
||||||
@@ -187,12 +225,12 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
uint64_t i;
|
uint64_t i;
|
||||||
uint32_t j, k;
|
uint32_t j, k;
|
||||||
int error;
|
int error;
|
||||||
cl_program programs[ VECTOR_SIZE_COUNT ];
|
cl_program programs[VECTOR_SIZE_COUNT];
|
||||||
cl_kernel kernels[ VECTOR_SIZE_COUNT ];
|
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||||
int ftz = f->ftz || 0 == (gFloatCapabilities & CL_FP_DENORM) || gForceFTZ;
|
int ftz = f->ftz || 0 == (gFloatCapabilities & CL_FP_DENORM) || gForceFTZ;
|
||||||
size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
|
size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
|
||||||
uint64_t step = getTestStep(sizeof(float), bufferSize);
|
uint64_t step = getTestStep(sizeof(float), bufferSize);
|
||||||
int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
|
int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(float)) + 1);
|
||||||
|
|
||||||
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
||||||
|
|
||||||
@@ -206,191 +244,226 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
// Init the kernels
|
// Init the kernels
|
||||||
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
|
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
|
||||||
f->nameInCode, relaxedMode };
|
f->nameInCode, relaxedMode };
|
||||||
if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
|
if ((error = ThreadPool_Do(BuildKernel_FloatFn,
|
||||||
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
|
&build_info)))
|
||||||
return error;
|
return error;
|
||||||
/*
|
/*
|
||||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||||
if( (error = BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
|
if( (error = BuildKernel( f->nameInCode, (int) i, kernels + i,
|
||||||
return error;
|
programs + i) ) ) return error;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for( i = 0; i < (1ULL<<32); i += step )
|
for (i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
//Init input array
|
// Init input array
|
||||||
uint32_t *p = (uint32_t *)gIn;
|
uint32_t *p = (uint32_t *)gIn;
|
||||||
if( gWimpyMode )
|
if (gWimpyMode)
|
||||||
{
|
{
|
||||||
for( j = 0; j < bufferSize / sizeof( float ); j++ )
|
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||||
p[j] = (uint32_t) i + j * scale;
|
p[j] = (uint32_t)i + j * scale;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for( j = 0; j < bufferSize / sizeof( float ); j++ )
|
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||||
p[j] = (uint32_t) i + j;
|
p[j] = (uint32_t)i + j;
|
||||||
}
|
}
|
||||||
if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
|
bufferSize, gIn, 0, NULL, NULL)))
|
||||||
{
|
{
|
||||||
vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, bufferSize);
|
memset_pattern4(gOut[j], &pattern, bufferSize);
|
||||||
if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
|
if ((error =
|
||||||
|
clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
|
||||||
|
bufferSize, gOut[j], 0, NULL, NULL)))
|
||||||
{
|
{
|
||||||
vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||||
|
error, j);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
||||||
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||||
if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
|
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||||
if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
|
&gOutBuffer[j])))
|
||||||
|
|
||||||
if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
|
|
||||||
{
|
{
|
||||||
vlog_error( "FAILED -- could not execute kernel\n" );
|
LogBuildError(programs[j]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
|
||||||
|
&gInBuffer)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error =
|
||||||
|
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||||
|
&localCount, NULL, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get that moving
|
// Get that moving
|
||||||
if( (error = clFlush(gQueue) ))
|
if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
|
||||||
vlog( "clFlush failed\n" );
|
|
||||||
|
|
||||||
//Calculate the correctly rounded reference result
|
// Calculate the correctly rounded reference result
|
||||||
int *r = (int *)gOut_Ref;
|
int *r = (int *)gOut_Ref;
|
||||||
float *s = (float *)gIn;
|
float *s = (float *)gIn;
|
||||||
for( j = 0; j < bufferSize / sizeof( float ); j++ )
|
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||||
r[j] = f->func.i_f( s[j] );
|
r[j] = f->func.i_f(s[j]);
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
|
if ((error =
|
||||||
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
|
bufferSize, gOut[j], 0, NULL, NULL)))
|
||||||
{
|
{
|
||||||
vlog_error( "ReadArray failed %d\n", error );
|
vlog_error("ReadArray failed %d\n", error);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( gSkipCorrectnessTesting )
|
if (gSkipCorrectnessTesting) break;
|
||||||
break;
|
|
||||||
|
|
||||||
//Verify data
|
// Verify data
|
||||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||||
for( j = 0; j < bufferSize / sizeof( float ); j++ )
|
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||||
{
|
{
|
||||||
for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
|
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||||
// If we aren't getting the correctly rounded result
|
// If we aren't getting the correctly rounded result
|
||||||
if( t[j] != q[j] )
|
if (t[j] != q[j])
|
||||||
{
|
{
|
||||||
if( ftz && IsFloatSubnormal(s[j]))
|
if (ftz && IsFloatSubnormal(s[j]))
|
||||||
{
|
{
|
||||||
unsigned int correct0 = f->func.i_f( 0.0 );
|
unsigned int correct0 = f->func.i_f(0.0);
|
||||||
unsigned int correct1 = f->func.i_f( -0.0 );
|
unsigned int correct1 = f->func.i_f(-0.0);
|
||||||
if( q[j] == correct0 || q[j] == correct1 )
|
if (q[j] == correct0 || q[j] == correct1) continue;
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t err = t[j] - q[j];
|
uint32_t err = t[j] - q[j];
|
||||||
if( q[j] > t[j] )
|
if (q[j] > t[j]) err = q[j] - t[j];
|
||||||
err = q[j] - t[j];
|
vlog_error("\nERROR: %s%s: %d ulp error at %a (0x%8.8x): "
|
||||||
vlog_error( "\nERROR: %s%s: %d ulp error at %a (0x%8.8x): *%d vs. %d\n", f->name, sizeNames[k], err, ((float*) gIn)[j], ((cl_uint*) gIn)[j], t[j], q[j] );
|
"*%d vs. %d\n",
|
||||||
error = -1;
|
f->name, sizeNames[k], err, ((float *)gIn)[j],
|
||||||
goto exit;
|
((cl_uint *)gIn)[j], t[j], q[j]);
|
||||||
|
error = -1;
|
||||||
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( 0 == (i & 0x0fffffff) )
|
if (0 == (i & 0x0fffffff))
|
||||||
{
|
{
|
||||||
if (gVerboseBruteForce)
|
if (gVerboseBruteForce)
|
||||||
{
|
{
|
||||||
vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, bufferSize);
|
vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
|
||||||
} else
|
bufferSize);
|
||||||
{
|
}
|
||||||
vlog("." );
|
else
|
||||||
}
|
{
|
||||||
fflush(stdout);
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( ! gSkipCorrectnessTesting )
|
if (!gSkipCorrectnessTesting)
|
||||||
{
|
{
|
||||||
if( gWimpyMode )
|
if (gWimpyMode)
|
||||||
vlog( "Wimp pass" );
|
vlog("Wimp pass");
|
||||||
else
|
else
|
||||||
vlog( "passed" );
|
vlog("passed");
|
||||||
}
|
}
|
||||||
|
|
||||||
if( gMeasureTimes )
|
if (gMeasureTimes)
|
||||||
{
|
{
|
||||||
//Init input array
|
// Init input array
|
||||||
uint32_t *p = (uint32_t *)gIn;
|
uint32_t *p = (uint32_t *)gIn;
|
||||||
for( j = 0; j < bufferSize / sizeof( float ); j++ )
|
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||||
p[j] = genrand_int32(d);
|
p[j] = genrand_int32(d);
|
||||||
if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
|
bufferSize, gIn, 0, NULL, NULL)))
|
||||||
{
|
{
|
||||||
vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
||||||
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||||
if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
|
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||||
if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
|
&gOutBuffer[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
|
||||||
|
&gInBuffer)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
double sum = 0.0;
|
double sum = 0.0;
|
||||||
double bestTime = INFINITY;
|
double bestTime = INFINITY;
|
||||||
for( k = 0; k < PERF_LOOP_COUNT; k++ )
|
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||||
{
|
{
|
||||||
uint64_t startTime = GetTime();
|
uint64_t startTime = GetTime();
|
||||||
if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
|
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||||
|
&localCount, NULL, 0, NULL,
|
||||||
|
NULL)))
|
||||||
{
|
{
|
||||||
vlog_error( "FAILED -- could not execute kernel\n" );
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure OpenCL is done
|
// Make sure OpenCL is done
|
||||||
if( (error = clFinish(gQueue) ) )
|
if ((error = clFinish(gQueue)))
|
||||||
{
|
{
|
||||||
vlog_error( "Error %d at clFinish\n", error );
|
vlog_error("Error %d at clFinish\n", error);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t endTime = GetTime();
|
uint64_t endTime = GetTime();
|
||||||
double time = SubtractTime( endTime, startTime );
|
double time = SubtractTime(endTime, startTime);
|
||||||
sum += time;
|
sum += time;
|
||||||
if( time < bestTime )
|
if (time < bestTime) bestTime = time;
|
||||||
bestTime = time;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if( gReportAverageTimes )
|
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||||
bestTime = sum / PERF_LOOP_COUNT;
|
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||||
double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
|
* gComputeDevices * gSimdSize * 1e6
|
||||||
vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
|
/ (bufferSize / sizeof(float));
|
||||||
|
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||||
|
f->name, sizeNames[j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vlog( "\n" );
|
vlog("\n");
|
||||||
exit:
|
exit:
|
||||||
RestoreFPState(&oldMode);
|
RestoreFPState(&oldMode);
|
||||||
// Release
|
// Release
|
||||||
for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
|
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
@@ -404,12 +477,12 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
uint64_t i;
|
uint64_t i;
|
||||||
uint32_t j, k;
|
uint32_t j, k;
|
||||||
int error;
|
int error;
|
||||||
cl_program programs[ VECTOR_SIZE_COUNT ];
|
cl_program programs[VECTOR_SIZE_COUNT];
|
||||||
cl_kernel kernels[ VECTOR_SIZE_COUNT ];
|
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
||||||
int ftz = f->ftz || gForceFTZ;
|
int ftz = f->ftz || gForceFTZ;
|
||||||
size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
|
size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE;
|
||||||
uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
|
uint64_t step = getTestStep(sizeof(cl_double), bufferSize);
|
||||||
int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
|
int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(cl_double)) + 1);
|
||||||
|
|
||||||
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
|
||||||
|
|
||||||
@@ -423,200 +496,231 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
|||||||
// Init the kernels
|
// Init the kernels
|
||||||
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
|
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
|
||||||
f->nameInCode, relaxedMode };
|
f->nameInCode, relaxedMode };
|
||||||
if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
|
if ((error = ThreadPool_Do(BuildKernel_DoubleFn,
|
||||||
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
||||||
&build_info ) ))
|
&build_info)))
|
||||||
{
|
{
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||||
if( (error = BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
|
if( (error = BuildKernelDouble( f->nameInCode, (int) i, kernels +
|
||||||
return error;
|
i, programs + i) ) ) return error;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for( i = 0; i < (1ULL<<32); i += step )
|
for (i = 0; i < (1ULL << 32); i += step)
|
||||||
{
|
{
|
||||||
//Init input array
|
// Init input array
|
||||||
double *p = (double *)gIn;
|
double *p = (double *)gIn;
|
||||||
if( gWimpyMode )
|
if (gWimpyMode)
|
||||||
{
|
{
|
||||||
for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
|
for (j = 0; j < bufferSize / sizeof(cl_double); j++)
|
||||||
p[j] = DoubleFromUInt32( (uint32_t) i + j * scale );
|
p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
|
for (j = 0; j < bufferSize / sizeof(cl_double); j++)
|
||||||
p[j] = DoubleFromUInt32( (uint32_t) i + j );
|
p[j] = DoubleFromUInt32((uint32_t)i + j);
|
||||||
}
|
}
|
||||||
if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
|
bufferSize, gIn, 0, NULL, NULL)))
|
||||||
{
|
{
|
||||||
vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
// write garbage into output arrays
|
// write garbage into output arrays
|
||||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
uint32_t pattern = 0xffffdead;
|
uint32_t pattern = 0xffffdead;
|
||||||
memset_pattern4(gOut[j], &pattern, bufferSize);
|
memset_pattern4(gOut[j], &pattern, bufferSize);
|
||||||
if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
|
if ((error =
|
||||||
|
clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
|
||||||
|
bufferSize, gOut[j], 0, NULL, NULL)))
|
||||||
{
|
{
|
||||||
vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
||||||
|
error, j);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||||
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||||
if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
|
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||||
if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
|
&gOutBuffer[j])))
|
||||||
|
|
||||||
if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
|
|
||||||
{
|
{
|
||||||
vlog_error( "FAILED -- could not execute kernel\n" );
|
LogBuildError(programs[j]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
|
||||||
|
&gInBuffer)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((error =
|
||||||
|
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||||
|
&localCount, NULL, 0, NULL, NULL)))
|
||||||
|
{
|
||||||
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get that moving
|
// Get that moving
|
||||||
if( (error = clFlush(gQueue) ))
|
if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
|
||||||
vlog( "clFlush failed\n" );
|
|
||||||
|
|
||||||
//Calculate the correctly rounded reference result
|
// Calculate the correctly rounded reference result
|
||||||
int *r = (int *)gOut_Ref;
|
int *r = (int *)gOut_Ref;
|
||||||
double *s = (double *)gIn;
|
double *s = (double *)gIn;
|
||||||
for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
|
for (j = 0; j < bufferSize / sizeof(cl_double); j++)
|
||||||
r[j] = f->dfunc.i_f( s[j] );
|
r[j] = f->dfunc.i_f(s[j]);
|
||||||
|
|
||||||
// Read the data back
|
// Read the data back
|
||||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
|
if ((error =
|
||||||
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
||||||
|
bufferSize, gOut[j], 0, NULL, NULL)))
|
||||||
{
|
{
|
||||||
vlog_error( "ReadArray failed %d\n", error );
|
vlog_error("ReadArray failed %d\n", error);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( gSkipCorrectnessTesting )
|
if (gSkipCorrectnessTesting) break;
|
||||||
break;
|
|
||||||
|
|
||||||
//Verify data
|
// Verify data
|
||||||
uint32_t *t = (uint32_t *)gOut_Ref;
|
uint32_t *t = (uint32_t *)gOut_Ref;
|
||||||
for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
|
for (j = 0; j < bufferSize / sizeof(cl_double); j++)
|
||||||
{
|
{
|
||||||
for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
|
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
uint32_t *q = (uint32_t *)(gOut[k]);
|
uint32_t *q = (uint32_t *)(gOut[k]);
|
||||||
// If we aren't getting the correctly rounded result
|
// If we aren't getting the correctly rounded result
|
||||||
if( t[j] != q[j] )
|
if (t[j] != q[j])
|
||||||
{
|
{
|
||||||
if( ftz && IsDoubleSubnormal(s[j]))
|
if (ftz && IsDoubleSubnormal(s[j]))
|
||||||
{
|
{
|
||||||
unsigned int correct0 = f->dfunc.i_f( 0.0 );
|
unsigned int correct0 = f->dfunc.i_f(0.0);
|
||||||
unsigned int correct1 = f->dfunc.i_f( -0.0 );
|
unsigned int correct1 = f->dfunc.i_f(-0.0);
|
||||||
if( q[j] == correct0 || q[j] == correct1 )
|
if (q[j] == correct0 || q[j] == correct1) continue;
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t err = t[j] - q[j];
|
uint32_t err = t[j] - q[j];
|
||||||
if( q[j] > t[j] )
|
if (q[j] > t[j]) err = q[j] - t[j];
|
||||||
err = q[j] - t[j];
|
vlog_error(
|
||||||
vlog_error( "\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n", f->name, sizeNames[k], err, ((double*) gIn)[j], t[j], q[j] );
|
"\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n",
|
||||||
error = -1;
|
f->name, sizeNames[k], err, ((double *)gIn)[j], t[j],
|
||||||
goto exit;
|
q[j]);
|
||||||
|
error = -1;
|
||||||
|
goto exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( 0 == (i & 0x0fffffff) )
|
if (0 == (i & 0x0fffffff))
|
||||||
{
|
{
|
||||||
if (gVerboseBruteForce)
|
if (gVerboseBruteForce)
|
||||||
{
|
{
|
||||||
vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step, bufferSize);
|
vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
|
||||||
} else
|
bufferSize);
|
||||||
{
|
|
||||||
vlog("." );
|
|
||||||
}
|
}
|
||||||
fflush(stdout);
|
else
|
||||||
|
{
|
||||||
|
vlog(".");
|
||||||
|
}
|
||||||
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( ! gSkipCorrectnessTesting )
|
if (!gSkipCorrectnessTesting)
|
||||||
{
|
{
|
||||||
if( gWimpyMode )
|
if (gWimpyMode)
|
||||||
vlog( "Wimp pass" );
|
vlog("Wimp pass");
|
||||||
else
|
else
|
||||||
vlog( "passed" );
|
vlog("passed");
|
||||||
}
|
}
|
||||||
|
|
||||||
if( gMeasureTimes )
|
if (gMeasureTimes)
|
||||||
{
|
{
|
||||||
//Init input array
|
// Init input array
|
||||||
double *p = (double *)gIn;
|
double *p = (double *)gIn;
|
||||||
for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
|
for (j = 0; j < bufferSize / sizeof(cl_double); j++)
|
||||||
p[j] = DoubleFromUInt32( genrand_int32(d) );
|
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||||
if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||||
|
bufferSize, gIn, 0, NULL, NULL)))
|
||||||
{
|
{
|
||||||
vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Run the kernels
|
// Run the kernels
|
||||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||||
{
|
{
|
||||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||||
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||||
if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
|
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||||
if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
|
&gOutBuffer[j])))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
|
||||||
|
&gInBuffer)))
|
||||||
|
{
|
||||||
|
LogBuildError(programs[j]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
double sum = 0.0;
|
double sum = 0.0;
|
||||||
double bestTime = INFINITY;
|
double bestTime = INFINITY;
|
||||||
for( k = 0; k < PERF_LOOP_COUNT; k++ )
|
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||||
{
|
{
|
||||||
uint64_t startTime = GetTime();
|
uint64_t startTime = GetTime();
|
||||||
if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
|
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||||
|
&localCount, NULL, 0, NULL,
|
||||||
|
NULL)))
|
||||||
{
|
{
|
||||||
vlog_error( "FAILED -- could not execute kernel\n" );
|
vlog_error("FAILED -- could not execute kernel\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure OpenCL is done
|
// Make sure OpenCL is done
|
||||||
if( (error = clFinish(gQueue) ) )
|
if ((error = clFinish(gQueue)))
|
||||||
{
|
{
|
||||||
vlog_error( "Error %d at clFinish\n", error );
|
vlog_error("Error %d at clFinish\n", error);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t endTime = GetTime();
|
uint64_t endTime = GetTime();
|
||||||
double time = SubtractTime( endTime, startTime );
|
double time = SubtractTime(endTime, startTime);
|
||||||
sum += time;
|
sum += time;
|
||||||
if( time < bestTime )
|
if (time < bestTime) bestTime = time;
|
||||||
bestTime = time;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if( gReportAverageTimes )
|
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||||
bestTime = sum / PERF_LOOP_COUNT;
|
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||||
double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
|
* gComputeDevices * gSimdSize * 1e6
|
||||||
vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
|
/ (bufferSize / sizeof(double));
|
||||||
|
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||||
|
f->name, sizeNames[j]);
|
||||||
}
|
}
|
||||||
for( ; j < gMaxVectorSizeIndex; j++ )
|
for (; j < gMaxVectorSizeIndex; j++) vlog("\t -- ");
|
||||||
vlog( "\t -- " );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vlog( "\n" );
|
vlog("\n");
|
||||||
|
|
||||||
|
|
||||||
exit:
|
exit:
|
||||||
RestoreFPState(&oldMode);
|
RestoreFPState(&oldMode);
|
||||||
// Release
|
// Release
|
||||||
for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
|
for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
||||||
{
|
{
|
||||||
clReleaseKernel(kernels[k]);
|
clReleaseKernel(kernels[k]);
|
||||||
clReleaseProgram(programs[k]);
|
clReleaseProgram(programs[k]);
|
||||||
@@ -624,4 +728,3 @@ exit:
|
|||||||
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
// You may obtain a copy of the License at
|
// You may obtain a copy of the License at
|
||||||
@@ -16,223 +16,221 @@
|
|||||||
#ifndef REFERENCE_MATH_H
|
#ifndef REFERENCE_MATH_H
|
||||||
#define REFERENCE_MATH_H
|
#define REFERENCE_MATH_H
|
||||||
|
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
#include <OpenCL/opencl.h>
|
#include <OpenCL/opencl.h>
|
||||||
#else
|
#else
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// -- for testing float --
|
// -- for testing float --
|
||||||
double reference_sinh( double x );
|
double reference_sinh(double x);
|
||||||
double reference_sqrt( double x );
|
double reference_sqrt(double x);
|
||||||
double reference_tanh( double x );
|
double reference_tanh(double x);
|
||||||
double reference_acos( double );
|
double reference_acos(double);
|
||||||
double reference_asin( double );
|
double reference_asin(double);
|
||||||
double reference_atan( double );
|
double reference_atan(double);
|
||||||
double reference_atan2( double, double );
|
double reference_atan2(double, double);
|
||||||
double reference_ceil( double );
|
double reference_ceil(double);
|
||||||
double reference_cosh( double );
|
double reference_cosh(double);
|
||||||
double reference_exp( double );
|
double reference_exp(double);
|
||||||
double reference_fabs( double );
|
double reference_fabs(double);
|
||||||
double reference_acospi( double );
|
double reference_acospi(double);
|
||||||
double reference_asinpi( double );
|
double reference_asinpi(double);
|
||||||
double reference_atanpi( double );
|
double reference_atanpi(double);
|
||||||
double reference_atan2pi( double, double );
|
double reference_atan2pi(double, double);
|
||||||
double reference_cospi( double );
|
double reference_cospi(double);
|
||||||
double reference_divide( double, double );
|
double reference_divide(double, double);
|
||||||
double reference_fract( double, double * );
|
double reference_fract(double, double*);
|
||||||
float reference_fma( float, float, float, int );
|
float reference_fma(float, float, float, int);
|
||||||
double reference_mad( double, double, double );
|
double reference_mad(double, double, double);
|
||||||
double reference_nextafter(double, double );
|
double reference_nextafter(double, double);
|
||||||
double reference_recip( double );
|
double reference_recip(double);
|
||||||
double reference_rootn( double, int );
|
double reference_rootn(double, int);
|
||||||
double reference_rsqrt( double );
|
double reference_rsqrt(double);
|
||||||
double reference_sincos( double, double * );
|
double reference_sincos(double, double*);
|
||||||
double reference_sinpi( double );
|
double reference_sinpi(double);
|
||||||
double reference_tanpi( double );
|
double reference_tanpi(double);
|
||||||
double reference_pow(double x, double y);
|
double reference_pow(double x, double y);
|
||||||
double reference_pown( double, int );
|
double reference_pown(double, int);
|
||||||
double reference_powr( double, double );
|
double reference_powr(double, double);
|
||||||
double reference_cos( double );
|
double reference_cos(double);
|
||||||
double reference_sin( double );
|
double reference_sin(double);
|
||||||
double reference_tan( double );
|
double reference_tan(double);
|
||||||
double reference_log( double );
|
double reference_log(double);
|
||||||
double reference_log10( double );
|
double reference_log10(double);
|
||||||
double reference_modf( double, double *n );
|
double reference_modf(double, double* n);
|
||||||
|
|
||||||
double reference_fdim( double, double );
|
double reference_fdim(double, double);
|
||||||
double reference_add( double, double );
|
double reference_add(double, double);
|
||||||
double reference_subtract( double, double );
|
double reference_subtract(double, double);
|
||||||
double reference_divide( double, double );
|
double reference_divide(double, double);
|
||||||
double reference_multiply( double, double );
|
double reference_multiply(double, double);
|
||||||
double reference_remquo( double, double, int* );
|
double reference_remquo(double, double, int*);
|
||||||
double reference_lgamma_r( double, int* );
|
double reference_lgamma_r(double, int*);
|
||||||
|
|
||||||
int reference_isequal( double, double );
|
int reference_isequal(double, double);
|
||||||
int reference_isfinite( double );
|
int reference_isfinite(double);
|
||||||
int reference_isgreater( double, double );
|
int reference_isgreater(double, double);
|
||||||
int reference_isgreaterequal( double, double );
|
int reference_isgreaterequal(double, double);
|
||||||
int reference_isinf( double );
|
int reference_isinf(double);
|
||||||
int reference_isless( double, double );
|
int reference_isless(double, double);
|
||||||
int reference_islessequal( double, double );
|
int reference_islessequal(double, double);
|
||||||
int reference_islessgreater( double, double );
|
int reference_islessgreater(double, double);
|
||||||
int reference_isnan( double );
|
int reference_isnan(double);
|
||||||
int reference_isnormal( double );
|
int reference_isnormal(double);
|
||||||
int reference_isnotequal( double, double );
|
int reference_isnotequal(double, double);
|
||||||
int reference_isordered( double, double );
|
int reference_isordered(double, double);
|
||||||
int reference_isunordered( double, double );
|
int reference_isunordered(double, double);
|
||||||
int reference_signbit( float );
|
int reference_signbit(float);
|
||||||
|
|
||||||
double reference_acosh( double x );
|
double reference_acosh(double x);
|
||||||
double reference_asinh( double x );
|
double reference_asinh(double x);
|
||||||
double reference_atanh( double x );
|
double reference_atanh(double x);
|
||||||
double reference_cbrt(double x);
|
double reference_cbrt(double x);
|
||||||
float reference_copysign( float x, float y);
|
float reference_copysign(float x, float y);
|
||||||
double reference_copysignd( double x, double y);
|
double reference_copysignd(double x, double y);
|
||||||
double reference_exp10( double );
|
double reference_exp10(double);
|
||||||
double reference_exp2( double x );
|
double reference_exp2(double x);
|
||||||
double reference_expm1( double x );
|
double reference_expm1(double x);
|
||||||
double reference_fmax( double x, double y );
|
double reference_fmax(double x, double y);
|
||||||
double reference_fmin( double x, double y );
|
double reference_fmin(double x, double y);
|
||||||
double reference_hypot( double x, double y );
|
double reference_hypot(double x, double y);
|
||||||
double reference_lgamma( double x);
|
double reference_lgamma(double x);
|
||||||
int reference_ilogb( double );
|
int reference_ilogb(double);
|
||||||
double reference_log2( double x );
|
double reference_log2(double x);
|
||||||
double reference_log1p( double x );
|
double reference_log1p(double x);
|
||||||
double reference_logb( double x );
|
double reference_logb(double x);
|
||||||
double reference_maxmag( double x, double y );
|
double reference_maxmag(double x, double y);
|
||||||
double reference_minmag( double x, double y );
|
double reference_minmag(double x, double y);
|
||||||
double reference_nan( cl_uint x );
|
double reference_nan(cl_uint x);
|
||||||
double reference_reciprocal( double x );
|
double reference_reciprocal(double x);
|
||||||
double reference_remainder( double x, double y );
|
double reference_remainder(double x, double y);
|
||||||
double reference_rint( double x );
|
double reference_rint(double x);
|
||||||
double reference_round( double x );
|
double reference_round(double x);
|
||||||
double reference_trunc( double x );
|
double reference_trunc(double x);
|
||||||
double reference_floor( double x );
|
double reference_floor(double x);
|
||||||
double reference_fmod( double x, double y );
|
double reference_fmod(double x, double y);
|
||||||
double reference_frexp( double x, int *n );
|
double reference_frexp(double x, int* n);
|
||||||
double reference_ldexp( double x, int n );
|
double reference_ldexp(double x, int n);
|
||||||
|
|
||||||
double reference_assignment( double x );
|
double reference_assignment(double x);
|
||||||
int reference_not( double x );
|
int reference_not(double x);
|
||||||
// -- for testing fast-relaxed
|
// -- for testing fast-relaxed
|
||||||
|
|
||||||
double reference_relaxed_acos(double);
|
double reference_relaxed_acos(double);
|
||||||
double reference_relaxed_asin(double);
|
double reference_relaxed_asin(double);
|
||||||
double reference_relaxed_atan(double);
|
double reference_relaxed_atan(double);
|
||||||
double reference_relaxed_mad( double, double, double );
|
double reference_relaxed_mad(double, double, double);
|
||||||
double reference_relaxed_divide( double x, double y );
|
double reference_relaxed_divide(double x, double y);
|
||||||
double reference_relaxed_sin( double x );
|
double reference_relaxed_sin(double x);
|
||||||
double reference_relaxed_sinpi(double x);
|
double reference_relaxed_sinpi(double x);
|
||||||
double reference_relaxed_cos( double x );
|
double reference_relaxed_cos(double x);
|
||||||
double reference_relaxed_cospi(double x);
|
double reference_relaxed_cospi(double x);
|
||||||
double reference_relaxed_sincos( double x, double * y);
|
double reference_relaxed_sincos(double x, double* y);
|
||||||
double reference_relaxed_tan( double x );
|
double reference_relaxed_tan(double x);
|
||||||
double reference_relaxed_exp( double x );
|
double reference_relaxed_exp(double x);
|
||||||
double reference_relaxed_exp2( double x );
|
double reference_relaxed_exp2(double x);
|
||||||
double reference_relaxed_exp10( double x );
|
double reference_relaxed_exp10(double x);
|
||||||
double reference_relaxed_log( double x );
|
double reference_relaxed_log(double x);
|
||||||
double reference_relaxed_log2( double x );
|
double reference_relaxed_log2(double x);
|
||||||
double reference_relaxed_log10(double x);
|
double reference_relaxed_log10(double x);
|
||||||
double reference_relaxed_pow( double x, double y);
|
double reference_relaxed_pow(double x, double y);
|
||||||
double reference_relaxed_reciprocal( double x );
|
double reference_relaxed_reciprocal(double x);
|
||||||
|
|
||||||
// -- for testing double --
|
// -- for testing double --
|
||||||
|
|
||||||
long double reference_sinhl( long double x );
|
long double reference_sinhl(long double x);
|
||||||
long double reference_sqrtl( long double x );
|
long double reference_sqrtl(long double x);
|
||||||
long double reference_tanhl( long double x );
|
long double reference_tanhl(long double x);
|
||||||
long double reference_acosl( long double );
|
long double reference_acosl(long double);
|
||||||
long double reference_asinl( long double );
|
long double reference_asinl(long double);
|
||||||
long double reference_atanl( long double );
|
long double reference_atanl(long double);
|
||||||
long double reference_atan2l( long double, long double );
|
long double reference_atan2l(long double, long double);
|
||||||
long double reference_ceill( long double );
|
long double reference_ceill(long double);
|
||||||
long double reference_coshl( long double );
|
long double reference_coshl(long double);
|
||||||
long double reference_expl( long double );
|
long double reference_expl(long double);
|
||||||
long double reference_fabsl( long double );
|
long double reference_fabsl(long double);
|
||||||
long double reference_acospil( long double );
|
long double reference_acospil(long double);
|
||||||
long double reference_asinpil( long double );
|
long double reference_asinpil(long double);
|
||||||
long double reference_atanpil( long double );
|
long double reference_atanpil(long double);
|
||||||
long double reference_atan2pil( long double, long double );
|
long double reference_atan2pil(long double, long double);
|
||||||
long double reference_cospil( long double );
|
long double reference_cospil(long double);
|
||||||
long double reference_dividel( long double, long double );
|
long double reference_dividel(long double, long double);
|
||||||
long double reference_fractl( long double, long double * );
|
long double reference_fractl(long double, long double*);
|
||||||
long double reference_fmal( long double, long double, long double );
|
long double reference_fmal(long double, long double, long double);
|
||||||
long double reference_madl( long double, long double, long double );
|
long double reference_madl(long double, long double, long double);
|
||||||
long double reference_nextafterl(long double, long double );
|
long double reference_nextafterl(long double, long double);
|
||||||
long double reference_recipl( long double );
|
long double reference_recipl(long double);
|
||||||
long double reference_rootnl( long double, int );
|
long double reference_rootnl(long double, int);
|
||||||
long double reference_rsqrtl( long double );
|
long double reference_rsqrtl(long double);
|
||||||
long double reference_sincosl( long double, long double * );
|
long double reference_sincosl(long double, long double*);
|
||||||
long double reference_sinpil( long double );
|
long double reference_sinpil(long double);
|
||||||
long double reference_tanpil( long double );
|
long double reference_tanpil(long double);
|
||||||
long double reference_powl(long double x, long double y);
|
long double reference_powl(long double x, long double y);
|
||||||
long double reference_pownl( long double, int );
|
long double reference_pownl(long double, int);
|
||||||
long double reference_powrl( long double, long double );
|
long double reference_powrl(long double, long double);
|
||||||
long double reference_cosl( long double );
|
long double reference_cosl(long double);
|
||||||
long double reference_sinl(long double );
|
long double reference_sinl(long double);
|
||||||
long double reference_tanl( long double );
|
long double reference_tanl(long double);
|
||||||
long double reference_logl( long double );
|
long double reference_logl(long double);
|
||||||
long double reference_log10l( long double );
|
long double reference_log10l(long double);
|
||||||
long double reference_modfl( long double, long double *n );
|
long double reference_modfl(long double, long double* n);
|
||||||
|
|
||||||
|
|
||||||
long double reference_fdiml( long double, long double );
|
long double reference_fdiml(long double, long double);
|
||||||
long double reference_addl( long double, long double );
|
long double reference_addl(long double, long double);
|
||||||
long double reference_subtractl( long double, long double );
|
long double reference_subtractl(long double, long double);
|
||||||
long double reference_dividel( long double, long double );
|
long double reference_dividel(long double, long double);
|
||||||
long double reference_multiplyl( long double, long double );
|
long double reference_multiplyl(long double, long double);
|
||||||
long double reference_remquol( long double, long double, int* );
|
long double reference_remquol(long double, long double, int*);
|
||||||
long double reference_lgamma_rl( long double, int* );
|
long double reference_lgamma_rl(long double, int*);
|
||||||
|
|
||||||
|
|
||||||
int reference_isequall( long double, long double );
|
int reference_isequall(long double, long double);
|
||||||
int reference_isfinitel( long double );
|
int reference_isfinitel(long double);
|
||||||
int reference_isgreaterl( long double, long double );
|
int reference_isgreaterl(long double, long double);
|
||||||
int reference_isgreaterequall( long double, long double );
|
int reference_isgreaterequall(long double, long double);
|
||||||
int reference_isinfl( long double );
|
int reference_isinfl(long double);
|
||||||
int reference_islessl( long double, long double );
|
int reference_islessl(long double, long double);
|
||||||
int reference_islessequall( long double, long double );
|
int reference_islessequall(long double, long double);
|
||||||
int reference_islessgreaterl( long double, long double );
|
int reference_islessgreaterl(long double, long double);
|
||||||
int reference_isnanl( long double );
|
int reference_isnanl(long double);
|
||||||
int reference_isnormall( long double );
|
int reference_isnormall(long double);
|
||||||
int reference_isnotequall( long double, long double );
|
int reference_isnotequall(long double, long double);
|
||||||
int reference_isorderedl( long double, long double );
|
int reference_isorderedl(long double, long double);
|
||||||
int reference_isunorderedl( long double, long double );
|
int reference_isunorderedl(long double, long double);
|
||||||
int reference_signbitl( long double );
|
int reference_signbitl(long double);
|
||||||
|
|
||||||
long double reference_acoshl( long double x );
|
long double reference_acoshl(long double x);
|
||||||
long double reference_asinhl( long double x );
|
long double reference_asinhl(long double x);
|
||||||
long double reference_atanhl( long double x );
|
long double reference_atanhl(long double x);
|
||||||
long double reference_cbrtl(long double x);
|
long double reference_cbrtl(long double x);
|
||||||
long double reference_copysignl( long double x, long double y);
|
long double reference_copysignl(long double x, long double y);
|
||||||
long double reference_exp10l( long double );
|
long double reference_exp10l(long double);
|
||||||
long double reference_exp2l( long double x );
|
long double reference_exp2l(long double x);
|
||||||
long double reference_expm1l( long double x );
|
long double reference_expm1l(long double x);
|
||||||
long double reference_fmaxl( long double x, long double y );
|
long double reference_fmaxl(long double x, long double y);
|
||||||
long double reference_fminl( long double x, long double y );
|
long double reference_fminl(long double x, long double y);
|
||||||
long double reference_hypotl( long double x, long double y );
|
long double reference_hypotl(long double x, long double y);
|
||||||
long double reference_lgammal( long double x);
|
long double reference_lgammal(long double x);
|
||||||
int reference_ilogbl( long double );
|
int reference_ilogbl(long double);
|
||||||
long double reference_log2l( long double x );
|
long double reference_log2l(long double x);
|
||||||
long double reference_log1pl( long double x );
|
long double reference_log1pl(long double x);
|
||||||
long double reference_logbl( long double x );
|
long double reference_logbl(long double x);
|
||||||
long double reference_maxmagl( long double x, long double y );
|
long double reference_maxmagl(long double x, long double y);
|
||||||
long double reference_minmagl( long double x, long double y );
|
long double reference_minmagl(long double x, long double y);
|
||||||
long double reference_nanl( cl_ulong x );
|
long double reference_nanl(cl_ulong x);
|
||||||
long double reference_reciprocall( long double x );
|
long double reference_reciprocall(long double x);
|
||||||
long double reference_remainderl( long double x, long double y );
|
long double reference_remainderl(long double x, long double y);
|
||||||
long double reference_rintl( long double x );
|
long double reference_rintl(long double x);
|
||||||
long double reference_roundl( long double x );
|
long double reference_roundl(long double x);
|
||||||
long double reference_truncl( long double x );
|
long double reference_truncl(long double x);
|
||||||
long double reference_floorl( long double x );
|
long double reference_floorl(long double x);
|
||||||
long double reference_fmodl( long double x, long double y );
|
long double reference_fmodl(long double x, long double y);
|
||||||
long double reference_frexpl( long double x, int *n );
|
long double reference_frexpl(long double x, int* n);
|
||||||
long double reference_ldexpl( long double x, int n );
|
long double reference_ldexpl(long double x, int n);
|
||||||
|
|
||||||
long double reference_assignmentl( long double x );
|
long double reference_assignmentl(long double x);
|
||||||
int reference_notl( long double x );
|
int reference_notl(long double x);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user