mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
* Fix enqueue_flags test to use correct barrier type. Currently, enqueue_flags test uses CLK_LOCAL_MEM_FENCE. Use CLK_GLOBAL_MEM_FENCE instead as all threads across work-groups need to wait here. * Add check for support for Read-Wrie images Read-Write images have required OpenCL 2.x. Read-Write image tests are already being skipped for 1.x devices. With OpenCL 3.0, read-write images being optional, the tests should be run or skipped depending on the implementation support. Add a check to decide if Read-Write images are supported or required to be supported depending on OpenCL version and decide if the tests should be run on skipped. Fixes issue #894 * Fix formatting in case of Read-Write image checks. Fix formatting in case of Read-write image checks. Also, combine two ifs into one in case of kerne_read_write tests * Fix some more formatting for RW-image checks Remove unnecessary spaces at various places. Also, fix lengthy lines. * Fix malloc-size calculation in test imagedim unsigned char size is silently assumed to be 1 in imagedim test of test_basic. Pass sizeof(type) in malloc size calculation. Also, change loop variable from signed to unsigned. Add checks for null pointer for malloced memory. * Fix command-line function range for bruteforce Runnning "test_bruteforce N M" is expected to skip first N functions and test M functions after it. When N is 0, the test currently skips M functions and run all functions thereafter. Fix the test to honor semantics of these command-line options to correctly test first M functions when N is 0.
1887 lines
60 KiB
C++
1887 lines
60 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "Utility.h"
|
|
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <string>
|
|
#include <time.h>
|
|
#include "FunctionList.h"
|
|
#include "Sleep.h"
|
|
|
|
#include "harness/errorHelpers.h"
|
|
#include "harness/kernelHelpers.h"
|
|
#include "harness/parseParameters.h"
|
|
#include "harness/typeWrappers.h"
|
|
|
|
#if defined(__APPLE__)
|
|
#include <sys/sysctl.h>
|
|
#include <sys/mman.h>
|
|
#include <libgen.h>
|
|
#include <sys/time.h>
|
|
#elif defined(__linux__)
|
|
#include <unistd.h>
|
|
#include <sys/syscall.h>
|
|
#include <linux/sysctl.h>
|
|
#include <sys/param.h>
|
|
#endif
|
|
|
|
#if defined(__linux__) || (defined WIN32 && defined __MINGW32__)
|
|
#include <sys/param.h>
|
|
#endif
|
|
|
|
#include "harness/testHarness.h"
|
|
|
|
#define kPageSize 4096
|
|
#define DOUBLE_REQUIRED_FEATURES \
|
|
(CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO \
|
|
| CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)
|
|
|
|
const char **gTestNames = NULL;
|
|
unsigned int gTestNameCount = 0;
|
|
char appName[MAXPATHLEN] = "";
|
|
cl_device_id gDevice = NULL;
|
|
cl_context gContext = NULL;
|
|
cl_command_queue gQueue = NULL;
|
|
static int32_t gStartTestNumber = -1;
|
|
static int32_t gEndTestNumber = -1;
|
|
int gSkipCorrectnessTesting = 0;
|
|
int gStopOnError = 0;
|
|
static bool gSkipRestOfTests;
|
|
#if defined(__APPLE__)
|
|
int gMeasureTimes = 1;
|
|
#else
|
|
int gMeasureTimes = 0;
|
|
#endif
|
|
int gReportAverageTimes = 0;
|
|
int gForceFTZ = 0;
|
|
int gWimpyMode = 0;
|
|
int gHasDouble = 0;
|
|
int gTestFloat = 1;
|
|
// This flag should be 'ON' by default and it can be changed through the command
|
|
// line arguments.
|
|
static int gTestFastRelaxed = 1;
|
|
/*This flag corresponds to defining if the implementation has Derived Fast
|
|
Relaxed functions. The spec does not specify ULP for derived function. The
|
|
derived functions are composed of base functions which are tested for ULP,
|
|
thus when this flag is enabled, Derived functions will not be tested for ULP,
|
|
as per table 7.1 of OpenCL 2.0 spec. Since there is no way of quering the
|
|
device whether it is a derived or non-derived implementation according to
|
|
OpenCL 2.0 spec then it has to be changed through a command line argument.
|
|
*/
|
|
int gFastRelaxedDerived = 1;
|
|
int gToggleCorrectlyRoundedDivideSqrt = 0;
|
|
int gDeviceILogb0 = 1;
|
|
int gDeviceILogbNaN = 1;
|
|
int gCheckTininessBeforeRounding = 1;
|
|
int gIsInRTZMode = 0;
|
|
uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
|
|
uint32_t gMinVectorSizeIndex = 0;
|
|
const char *method[] = { "Best", "Average" };
|
|
void *gIn = NULL;
|
|
void *gIn2 = NULL;
|
|
void *gIn3 = NULL;
|
|
void *gOut_Ref = NULL;
|
|
void *gOut[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
|
|
void *gOut_Ref2 = NULL;
|
|
void *gOut2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
|
|
cl_mem gInBuffer = NULL;
|
|
cl_mem gInBuffer2 = NULL;
|
|
cl_mem gInBuffer3 = NULL;
|
|
cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
|
|
cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
|
|
uint32_t gComputeDevices = 0;
|
|
uint32_t gSimdSize = 1;
|
|
uint32_t gDeviceFrequency = 0;
|
|
static MTdata gMTdata;
|
|
cl_device_fp_config gFloatCapabilities = 0;
|
|
cl_device_fp_config gDoubleCapabilities = 0;
|
|
int gWimpyReductionFactor = 32;
|
|
int gWimpyBufferSize = BUFFER_SIZE;
|
|
int gVerboseBruteForce = 0;
|
|
|
|
static int ParseArgs(int argc, const char **argv);
|
|
static void PrintUsage(void);
|
|
static void PrintFunctions(void);
|
|
test_status InitCL(cl_device_id device);
|
|
static void ReleaseCL(void);
|
|
static int InitILogbConstants(void);
|
|
static int IsTininessDetectedBeforeRounding(void);
|
|
static int
|
|
IsInRTZMode(void); // expensive. Please check gIsInRTZMode global instead.
|
|
|
|
|
|
int doTest(const char *name)
|
|
{
|
|
if (gSkipRestOfTests)
|
|
{
|
|
vlog("Skipping function because of an earlier error.\n");
|
|
return 1;
|
|
}
|
|
|
|
int error = 0;
|
|
const Func *func_data = NULL;
|
|
|
|
for (size_t i = 0; i < functionListCount; i++)
|
|
{
|
|
const Func *const temp_func = functionList + i;
|
|
if (strcmp(temp_func->name, name) == 0)
|
|
{
|
|
if (i < gStartTestNumber || i > gEndTestNumber)
|
|
{
|
|
vlog("Skipping function #%d\n", i);
|
|
return 0;
|
|
}
|
|
|
|
func_data = temp_func;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (func_data == NULL)
|
|
{
|
|
vlog("Function '%s' doesn't exist!\n", name);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
if (func_data->func.p == NULL)
|
|
{
|
|
vlog("'%s' is missing implementation, skipping function.\n",
|
|
func_data->name);
|
|
return 0;
|
|
}
|
|
|
|
// if correctly rounded divide & sqrt are supported by the implementation
|
|
// then test it; otherwise skip the test
|
|
if (strcmp(func_data->name, "sqrt_cr") == 0
|
|
|| strcmp(func_data->name, "divide_cr") == 0)
|
|
{
|
|
if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0)
|
|
{
|
|
vlog("Correctly rounded divide and sqrt are not supported, "
|
|
"skipping function.\n");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
{
|
|
extern int my_ilogb(double);
|
|
if (0 == strcmp("ilogb", func_data->name))
|
|
{
|
|
InitILogbConstants();
|
|
}
|
|
|
|
if (gTestFastRelaxed && func_data->relaxed)
|
|
{
|
|
if (get_device_cl_version(gDevice) > Version(1, 2))
|
|
{
|
|
gTestCount++;
|
|
vlog("%3d: ", gTestCount);
|
|
// Test with relaxed requirements here.
|
|
if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
|
|
true /* relaxed mode */))
|
|
{
|
|
gFailCount++;
|
|
error++;
|
|
if (gStopOnError)
|
|
{
|
|
gSkipRestOfTests = true;
|
|
return error;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
vlog("Skipping reduced precision testing for device with "
|
|
"version 1.2 or less\n");
|
|
}
|
|
}
|
|
|
|
if (gTestFloat)
|
|
{
|
|
gTestCount++;
|
|
vlog("%3d: ", gTestCount);
|
|
// Don't test with relaxed requirements.
|
|
if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
|
|
false /* relaxed mode */))
|
|
{
|
|
gFailCount++;
|
|
error++;
|
|
if (gStopOnError)
|
|
{
|
|
gSkipRestOfTests = true;
|
|
return error;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc
|
|
&& NULL != func_data->dfunc.p)
|
|
{
|
|
gTestCount++;
|
|
vlog("%3d: ", gTestCount);
|
|
// Don't test with relaxed requirements.
|
|
if (func_data->vtbl_ptr->DoubleTestFunc(func_data, gMTdata,
|
|
false /* relaxed mode*/))
|
|
{
|
|
gFailCount++;
|
|
error++;
|
|
if (gStopOnError)
|
|
{
|
|
gSkipRestOfTests = true;
|
|
return error;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
int test_acos(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("acos");
|
|
}
|
|
int test_acosh(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("acosh");
|
|
}
|
|
int test_acospi(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("acospi");
|
|
}
|
|
int test_asin(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("asin");
|
|
}
|
|
int test_asinh(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("asinh");
|
|
}
|
|
int test_asinpi(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("asinpi");
|
|
}
|
|
int test_atan(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("atan");
|
|
}
|
|
int test_atanh(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("atanh");
|
|
}
|
|
int test_atanpi(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("atanpi");
|
|
}
|
|
int test_atan2(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("atan2");
|
|
}
|
|
int test_atan2pi(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("atan2pi");
|
|
}
|
|
int test_cbrt(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("cbrt");
|
|
}
|
|
int test_ceil(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("ceil");
|
|
}
|
|
int test_copysign(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("copysign");
|
|
}
|
|
int test_cos(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("cos");
|
|
}
|
|
int test_cosh(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("cosh");
|
|
}
|
|
int test_cospi(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("cospi");
|
|
}
|
|
int test_exp(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("exp");
|
|
}
|
|
int test_exp2(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("exp2");
|
|
}
|
|
int test_exp10(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("exp10");
|
|
}
|
|
int test_expm1(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("expm1");
|
|
}
|
|
int test_fabs(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("fabs");
|
|
}
|
|
int test_fdim(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("fdim");
|
|
}
|
|
int test_floor(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("floor");
|
|
}
|
|
int test_fma(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("fma");
|
|
}
|
|
int test_fmax(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("fmax");
|
|
}
|
|
int test_fmin(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("fmin");
|
|
}
|
|
int test_fmod(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("fmod");
|
|
}
|
|
int test_fract(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("fract");
|
|
}
|
|
int test_frexp(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("frexp");
|
|
}
|
|
int test_hypot(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("hypot");
|
|
}
|
|
int test_ilogb(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("ilogb");
|
|
}
|
|
int test_isequal(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("isequal");
|
|
}
|
|
int test_isfinite(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("isfinite");
|
|
}
|
|
int test_isgreater(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("isgreater");
|
|
}
|
|
int test_isgreaterequal(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("isgreaterequal");
|
|
}
|
|
int test_isinf(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("isinf");
|
|
}
|
|
int test_isless(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("isless");
|
|
}
|
|
int test_islessequal(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("islessequal");
|
|
}
|
|
int test_islessgreater(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("islessgreater");
|
|
}
|
|
int test_isnan(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("isnan");
|
|
}
|
|
int test_isnormal(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("isnormal");
|
|
}
|
|
int test_isnotequal(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("isnotequal");
|
|
}
|
|
int test_isordered(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("isordered");
|
|
}
|
|
int test_isunordered(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("isunordered");
|
|
}
|
|
int test_ldexp(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("ldexp");
|
|
}
|
|
int test_lgamma(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("lgamma");
|
|
}
|
|
int test_lgamma_r(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("lgamma_r");
|
|
}
|
|
int test_log(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("log");
|
|
}
|
|
int test_log2(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("log2");
|
|
}
|
|
int test_log10(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("log10");
|
|
}
|
|
int test_log1p(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("log1p");
|
|
}
|
|
int test_logb(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("logb");
|
|
}
|
|
int test_mad(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("mad");
|
|
}
|
|
int test_maxmag(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("maxmag");
|
|
}
|
|
int test_minmag(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("minmag");
|
|
}
|
|
int test_modf(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("modf");
|
|
}
|
|
int test_nan(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("nan");
|
|
}
|
|
int test_nextafter(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("nextafter");
|
|
}
|
|
int test_pow(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("pow");
|
|
}
|
|
int test_pown(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("pown");
|
|
}
|
|
int test_powr(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("powr");
|
|
}
|
|
int test_remainder(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("remainder");
|
|
}
|
|
int test_remquo(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("remquo");
|
|
}
|
|
int test_rint(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("rint");
|
|
}
|
|
int test_rootn(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("rootn");
|
|
}
|
|
int test_round(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("round");
|
|
}
|
|
int test_rsqrt(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("rsqrt");
|
|
}
|
|
int test_signbit(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("signbit");
|
|
}
|
|
int test_sin(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("sin");
|
|
}
|
|
int test_sincos(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("sincos");
|
|
}
|
|
int test_sinh(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("sinh");
|
|
}
|
|
int test_sinpi(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("sinpi");
|
|
}
|
|
int test_sqrt(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("sqrt");
|
|
}
|
|
int test_sqrt_cr(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("sqrt_cr");
|
|
}
|
|
int test_tan(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("tan");
|
|
}
|
|
int test_tanh(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("tanh");
|
|
}
|
|
int test_tanpi(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("tanpi");
|
|
}
|
|
int test_trunc(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("trunc");
|
|
}
|
|
int test_half_cos(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_cos");
|
|
}
|
|
int test_half_divide(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_divide");
|
|
}
|
|
int test_half_exp(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_exp");
|
|
}
|
|
int test_half_exp2(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_exp2");
|
|
}
|
|
int test_half_exp10(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_exp10");
|
|
}
|
|
int test_half_log(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_log");
|
|
}
|
|
int test_half_log2(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_log2");
|
|
}
|
|
int test_half_log10(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_log10");
|
|
}
|
|
int test_half_powr(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_powr");
|
|
}
|
|
int test_half_recip(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_recip");
|
|
}
|
|
int test_half_rsqrt(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_rsqrt");
|
|
}
|
|
int test_half_sin(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_sin");
|
|
}
|
|
int test_half_sqrt(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_sqrt");
|
|
}
|
|
int test_half_tan(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("half_tan");
|
|
}
|
|
int test_add(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("add");
|
|
}
|
|
int test_subtract(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("subtract");
|
|
}
|
|
int test_divide(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("divide");
|
|
}
|
|
int test_divide_cr(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("divide_cr");
|
|
}
|
|
int test_multiply(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("multiply");
|
|
}
|
|
int test_assignment(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return doTest("assignment");
|
|
}
|
|
int test_not(cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
return doTest("not");
|
|
}
|
|
|
|
test_definition test_list[] = {
|
|
ADD_TEST(acos), ADD_TEST(acosh), ADD_TEST(acospi),
|
|
ADD_TEST(asin), ADD_TEST(asinh), ADD_TEST(asinpi),
|
|
ADD_TEST(atan), ADD_TEST(atanh), ADD_TEST(atanpi),
|
|
ADD_TEST(atan2), ADD_TEST(atan2pi), ADD_TEST(cbrt),
|
|
ADD_TEST(ceil), ADD_TEST(copysign), ADD_TEST(cos),
|
|
ADD_TEST(cosh), ADD_TEST(cospi), ADD_TEST(exp),
|
|
ADD_TEST(exp2), ADD_TEST(exp10), ADD_TEST(expm1),
|
|
ADD_TEST(fabs), ADD_TEST(fdim), ADD_TEST(floor),
|
|
ADD_TEST(fma), ADD_TEST(fmax), ADD_TEST(fmin),
|
|
ADD_TEST(fmod), ADD_TEST(fract), ADD_TEST(frexp),
|
|
ADD_TEST(hypot), ADD_TEST(ilogb), ADD_TEST(isequal),
|
|
ADD_TEST(isfinite), ADD_TEST(isgreater), ADD_TEST(isgreaterequal),
|
|
ADD_TEST(isinf), ADD_TEST(isless), ADD_TEST(islessequal),
|
|
ADD_TEST(islessgreater), ADD_TEST(isnan), ADD_TEST(isnormal),
|
|
ADD_TEST(isnotequal), ADD_TEST(isordered), ADD_TEST(isunordered),
|
|
ADD_TEST(ldexp), ADD_TEST(lgamma), ADD_TEST(lgamma_r),
|
|
ADD_TEST(log), ADD_TEST(log2), ADD_TEST(log10),
|
|
ADD_TEST(log1p), ADD_TEST(logb), ADD_TEST(mad),
|
|
ADD_TEST(maxmag), ADD_TEST(minmag), ADD_TEST(modf),
|
|
ADD_TEST(nan), ADD_TEST(nextafter), ADD_TEST(pow),
|
|
ADD_TEST(pown), ADD_TEST(powr), ADD_TEST(remainder),
|
|
ADD_TEST(remquo), ADD_TEST(rint), ADD_TEST(rootn),
|
|
ADD_TEST(round), ADD_TEST(rsqrt), ADD_TEST(signbit),
|
|
ADD_TEST(sin), ADD_TEST(sincos), ADD_TEST(sinh),
|
|
ADD_TEST(sinpi), ADD_TEST(sqrt), ADD_TEST(sqrt_cr),
|
|
ADD_TEST(tan), ADD_TEST(tanh), ADD_TEST(tanpi),
|
|
ADD_TEST(trunc), ADD_TEST(half_cos), ADD_TEST(half_divide),
|
|
ADD_TEST(half_exp), ADD_TEST(half_exp2), ADD_TEST(half_exp10),
|
|
ADD_TEST(half_log), ADD_TEST(half_log2), ADD_TEST(half_log10),
|
|
ADD_TEST(half_powr), ADD_TEST(half_recip), ADD_TEST(half_rsqrt),
|
|
ADD_TEST(half_sin), ADD_TEST(half_sqrt), ADD_TEST(half_tan),
|
|
ADD_TEST(add), ADD_TEST(subtract), ADD_TEST(divide),
|
|
ADD_TEST(divide_cr), ADD_TEST(multiply), ADD_TEST(assignment),
|
|
ADD_TEST(not),
|
|
};
|
|
|
|
const int test_num = ARRAY_SIZE(test_list);
|
|
|
|
#pragma mark -
|
|
|
|
int main(int argc, const char *argv[])
|
|
{
|
|
int error;
|
|
|
|
argc = parseCustomParam(argc, argv);
|
|
if (argc == -1)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
#if defined(__APPLE__)
|
|
struct timeval startTime;
|
|
gettimeofday(&startTime, NULL);
|
|
#endif
|
|
|
|
error = ParseArgs(argc, argv);
|
|
if (error) return error;
|
|
|
|
// This takes a while, so prevent the machine from going to sleep.
|
|
PreventSleep();
|
|
atexit(ResumeSleep);
|
|
|
|
if (gSkipCorrectnessTesting)
|
|
vlog("*** Skipping correctness testing! ***\n\n");
|
|
else if (gStopOnError)
|
|
vlog("Stopping at first error.\n");
|
|
|
|
if (gMeasureTimes)
|
|
{
|
|
vlog("%s times are reported at right (cycles per element):\n",
|
|
method[gReportAverageTimes]);
|
|
vlog("\n");
|
|
if (gSkipCorrectnessTesting)
|
|
vlog(" \t ");
|
|
else
|
|
vlog(" \t ");
|
|
if (gWimpyMode) vlog(" ");
|
|
for (int i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
vlog("\t float%s", sizeNames[i]);
|
|
}
|
|
else
|
|
{
|
|
vlog(" \t ");
|
|
if (gWimpyMode) vlog(" ");
|
|
}
|
|
if (!gSkipCorrectnessTesting) vlog("\t max_ulps");
|
|
|
|
vlog("\n-------------------------------------------------------------------"
|
|
"----------------------------------------\n");
|
|
|
|
gMTdata = init_genrand(gRandomSeed);
|
|
if (gEndTestNumber == 0)
|
|
{
|
|
gEndTestNumber = functionListCount;
|
|
}
|
|
|
|
FPU_mode_type oldMode;
|
|
DisableFTZ(&oldMode);
|
|
|
|
int ret = runTestHarnessWithCheck(gTestNameCount, gTestNames, test_num,
|
|
test_list, true, 0, InitCL);
|
|
|
|
RestoreFPState(&oldMode);
|
|
|
|
free_mtdata(gMTdata);
|
|
free(gTestNames);
|
|
|
|
if (gQueue)
|
|
{
|
|
int error_code = clFinish(gQueue);
|
|
if (error_code) vlog_error("clFinish failed:%d\n", error_code);
|
|
}
|
|
|
|
ReleaseCL();
|
|
|
|
#if defined(__APPLE__)
|
|
struct timeval endTime;
|
|
gettimeofday(&endTime, NULL);
|
|
double time = (double)endTime.tv_sec - (double)startTime.tv_sec;
|
|
time += 1e-6 * ((double)endTime.tv_usec - (double)startTime.tv_usec);
|
|
vlog("time: %f s\n", time);
|
|
#endif
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int ParseArgs(int argc, const char **argv)
|
|
{
|
|
int i;
|
|
gTestNames = (const char **)calloc(argc - 1, sizeof(char *));
|
|
if (NULL == gTestNames)
|
|
{
|
|
vlog("Failed to allocate memory for gTestNames array.\n");
|
|
return 1;
|
|
}
|
|
gTestNames[0] = argv[0];
|
|
gTestNameCount = 1;
|
|
int singleThreaded = 0;
|
|
|
|
{ // Extract the app name
|
|
strncpy(appName, argv[0], MAXPATHLEN);
|
|
|
|
#if defined(__APPLE__)
|
|
char baseName[MAXPATHLEN];
|
|
char *base = NULL;
|
|
strncpy(baseName, argv[0], MAXPATHLEN);
|
|
base = basename(baseName);
|
|
if (NULL != base)
|
|
{
|
|
strncpy(appName, base, sizeof(appName));
|
|
appName[sizeof(appName) - 1] = '\0';
|
|
}
|
|
#endif
|
|
}
|
|
|
|
vlog("\n%s\t", appName);
|
|
for (i = 1; i < argc; i++)
|
|
{
|
|
const char *arg = argv[i];
|
|
if (NULL == arg) break;
|
|
|
|
vlog("\t%s", arg);
|
|
int optionFound = 0;
|
|
if (arg[0] == '-')
|
|
{
|
|
while (arg[1] != '\0')
|
|
{
|
|
arg++;
|
|
optionFound = 1;
|
|
switch (*arg)
|
|
{
|
|
case 'a': gReportAverageTimes ^= 1; break;
|
|
|
|
case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
|
|
|
|
case 'd': gHasDouble ^= 1; break;
|
|
|
|
case 'e': gFastRelaxedDerived ^= 1; break;
|
|
|
|
case 'f': gTestFloat ^= 1; break;
|
|
|
|
case 'h': PrintUsage(); return -1;
|
|
|
|
case 'p': PrintFunctions(); return -1;
|
|
|
|
case 'l': gSkipCorrectnessTesting ^= 1; break;
|
|
|
|
case 'm': singleThreaded ^= 1; break;
|
|
|
|
case 'r': gTestFastRelaxed ^= 1; break;
|
|
|
|
case 's': gStopOnError ^= 1; break;
|
|
|
|
case 't': gMeasureTimes ^= 1; break;
|
|
|
|
case 'v': gVerboseBruteForce ^= 1; break;
|
|
|
|
case 'w': // wimpy mode
|
|
gWimpyMode ^= 1;
|
|
break;
|
|
|
|
case '[':
|
|
parseWimpyReductionFactor(arg, gWimpyReductionFactor);
|
|
break;
|
|
|
|
case 'z': gForceFTZ ^= 1; break;
|
|
|
|
case '1':
|
|
if (arg[1] == '6')
|
|
{
|
|
gMinVectorSizeIndex = 5;
|
|
gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
|
|
arg++;
|
|
}
|
|
else
|
|
{
|
|
gMinVectorSizeIndex = 0;
|
|
gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
|
|
}
|
|
break;
|
|
case '2':
|
|
gMinVectorSizeIndex = 1;
|
|
gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
|
|
break;
|
|
case '3':
|
|
gMinVectorSizeIndex = 2;
|
|
gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
|
|
break;
|
|
case '4':
|
|
gMinVectorSizeIndex = 3;
|
|
gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
|
|
break;
|
|
case '8':
|
|
gMinVectorSizeIndex = 4;
|
|
gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
|
|
break;
|
|
break;
|
|
|
|
default:
|
|
vlog(" <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg);
|
|
PrintUsage();
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!optionFound)
|
|
{
|
|
char *t = NULL;
|
|
long number = strtol(arg, &t, 0);
|
|
if (t != arg)
|
|
{
|
|
if (-1 == gStartTestNumber)
|
|
gStartTestNumber = (int32_t)number;
|
|
else
|
|
gEndTestNumber = gStartTestNumber + (int32_t)number;
|
|
}
|
|
else
|
|
{
|
|
// Make sure this is a valid name
|
|
unsigned int k;
|
|
for (k = 0; k < functionListCount; k++)
|
|
{
|
|
const Func *f = functionList + k;
|
|
if (strcmp(arg, f->name) == 0)
|
|
{
|
|
gTestNames[gTestNameCount] = arg;
|
|
gTestNameCount++;
|
|
break;
|
|
}
|
|
}
|
|
// If we didn't find it in the list of test names
|
|
if (k >= functionListCount)
|
|
{
|
|
gTestNames[gTestNameCount] = arg;
|
|
gTestNameCount++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check for the wimpy mode environment variable
|
|
if (getenv("CL_WIMPY_MODE"))
|
|
{
|
|
vlog("\n");
|
|
vlog("*** Detected CL_WIMPY_MODE env ***\n");
|
|
gWimpyMode = 1;
|
|
}
|
|
|
|
vlog("\nTest binary built %s %s\n", __DATE__, __TIME__);
|
|
|
|
PrintArch();
|
|
|
|
if (gWimpyMode)
|
|
{
|
|
vlog("\n");
|
|
vlog("*** WARNING: Testing in Wimpy mode! ***\n");
|
|
vlog("*** Wimpy mode is not sufficient to verify correctness. ***\n");
|
|
vlog("*** Wimpy Reduction Factor: %-27u ***\n\n",
|
|
gWimpyReductionFactor);
|
|
}
|
|
|
|
if (singleThreaded) SetThreadCount(1);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void PrintFunctions(void)
|
|
{
|
|
vlog("\nMath function names:\n");
|
|
for (int i = 0; i < functionListCount; i++)
|
|
{
|
|
vlog("\t%s\n", functionList[i].name);
|
|
}
|
|
}
|
|
|
|
static void PrintUsage(void)
|
|
{
|
|
vlog("%s [-acglstz]: <optional: math function names>\n", appName);
|
|
vlog("\toptions:\n");
|
|
vlog("\t\t-a\tReport average times instead of best times\n");
|
|
vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
|
|
"off)\n");
|
|
vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
|
|
"on)\n");
|
|
vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
|
|
vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n");
|
|
vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math "
|
|
"precision. (Default: on)\n");
|
|
vlog("\t\t-h\tPrint this message and quit\n");
|
|
vlog("\t\t-p\tPrint all math function names and quit\n");
|
|
vlog("\t\t-l\tlink check only (make sure functions are present, skip "
|
|
"accuracy checks.)\n");
|
|
vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n");
|
|
vlog("\t\t-s\tStop on error\n");
|
|
vlog("\t\t-t\tToggle timing (on by default)\n");
|
|
vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
|
|
vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
|
|
"1-10, default factor(%u)\n",
|
|
gWimpyReductionFactor);
|
|
vlog("\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by "
|
|
"device capabilities by default.)\n");
|
|
vlog("\t\t-v\tToggle Verbosity (Default: off)\n ");
|
|
vlog("\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, "
|
|
"\"-16\" tests 16-wide vectors only.\n");
|
|
vlog("\n\tYou may also pass a number instead of a function name.\n");
|
|
vlog("\tThis causes the first N tests to be skipped. The tests are "
|
|
"numbered.\n");
|
|
vlog("\tIf you pass a second number, that is the number tests to run after "
|
|
"the first one.\n");
|
|
vlog("\tA name list may be used in conjunction with a number range. In "
|
|
"that case,\n");
|
|
vlog("\tonly the named cases in the number range will run.\n");
|
|
vlog("\tYou may also choose to pass no arguments, in which case all tests "
|
|
"will be run.\n");
|
|
vlog("\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the "
|
|
"device.\n");
|
|
vlog("\n");
|
|
}
|
|
|
|
static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo,
|
|
const void *private_info,
|
|
size_t cb, void *user_data)
|
|
{
|
|
vlog("%s (%p, %zd, %p)\n", errinfo, private_info, cb, user_data);
|
|
}
|
|
|
|
test_status InitCL(cl_device_id device)
|
|
{
|
|
int error;
|
|
uint32_t i;
|
|
size_t configSize = sizeof(gComputeDevices);
|
|
cl_device_type device_type;
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type),
|
|
&device_type, NULL);
|
|
if (error)
|
|
{
|
|
print_error(error, "Unable to get device type");
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
gDevice = device;
|
|
if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_COMPUTE_UNITS,
|
|
configSize, &gComputeDevices, NULL)))
|
|
gComputeDevices = 1;
|
|
|
|
// Check extensions
|
|
if (is_extension_available(gDevice, "cl_khr_fp64"))
|
|
{
|
|
gHasDouble ^= 1;
|
|
#if defined(CL_DEVICE_DOUBLE_FP_CONFIG)
|
|
if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG,
|
|
sizeof(gDoubleCapabilities),
|
|
&gDoubleCapabilities, NULL)))
|
|
{
|
|
vlog_error("ERROR: Unable to get device "
|
|
"CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n",
|
|
error);
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
if (DOUBLE_REQUIRED_FEATURES
|
|
!= (gDoubleCapabilities & DOUBLE_REQUIRED_FEATURES))
|
|
{
|
|
std::string list;
|
|
if (0 == (gDoubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, ";
|
|
if (0 == (gDoubleCapabilities & CL_FP_ROUND_TO_NEAREST))
|
|
list += "CL_FP_ROUND_TO_NEAREST, ";
|
|
if (0 == (gDoubleCapabilities & CL_FP_ROUND_TO_ZERO))
|
|
list += "CL_FP_ROUND_TO_ZERO, ";
|
|
if (0 == (gDoubleCapabilities & CL_FP_ROUND_TO_INF))
|
|
list += "CL_FP_ROUND_TO_INF, ";
|
|
if (0 == (gDoubleCapabilities & CL_FP_INF_NAN))
|
|
list += "CL_FP_INF_NAN, ";
|
|
if (0 == (gDoubleCapabilities & CL_FP_DENORM))
|
|
list += "CL_FP_DENORM, ";
|
|
vlog_error("ERROR: required double features are missing: %s\n",
|
|
list.c_str());
|
|
|
|
return TEST_FAIL;
|
|
}
|
|
#else
|
|
vlog_error("FAIL: device says it supports cl_khr_fp64 but "
|
|
"CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n");
|
|
return TEST_FAIL;
|
|
#endif
|
|
}
|
|
|
|
configSize = sizeof(gDeviceFrequency);
|
|
if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY,
|
|
configSize, &gDeviceFrequency, NULL)))
|
|
gDeviceFrequency = 0;
|
|
|
|
if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG,
|
|
sizeof(gFloatCapabilities),
|
|
&gFloatCapabilities, NULL)))
|
|
{
|
|
vlog_error(
|
|
"ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n",
|
|
error);
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
gContext = clCreateContext(NULL, 1, &gDevice, bruteforce_notify_callback,
|
|
NULL, &error);
|
|
if (NULL == gContext || error)
|
|
{
|
|
vlog_error("clCreateContext failed. (%d) \n", error);
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
gQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
|
|
if (NULL == gQueue || error)
|
|
{
|
|
vlog_error("clCreateCommandQueue failed. (%d)\n", error);
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
#if defined(__APPLE__)
|
|
// FIXME: use clProtectedArray
|
|
#endif
|
|
// Allocate buffers
|
|
cl_uint min_alignment = 0;
|
|
error = clGetDeviceInfo(gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN,
|
|
sizeof(cl_uint), (void *)&min_alignment, NULL);
|
|
if (CL_SUCCESS != error)
|
|
{
|
|
vlog_error("clGetDeviceInfo failed. (%d)\n", error);
|
|
return TEST_FAIL;
|
|
}
|
|
min_alignment >>= 3; // convert bits to bytes
|
|
|
|
gIn = align_malloc(BUFFER_SIZE, min_alignment);
|
|
if (NULL == gIn) return TEST_FAIL;
|
|
gIn2 = align_malloc(BUFFER_SIZE, min_alignment);
|
|
if (NULL == gIn2) return TEST_FAIL;
|
|
gIn3 = align_malloc(BUFFER_SIZE, min_alignment);
|
|
if (NULL == gIn3) return TEST_FAIL;
|
|
gOut_Ref = align_malloc(BUFFER_SIZE, min_alignment);
|
|
if (NULL == gOut_Ref) return TEST_FAIL;
|
|
gOut_Ref2 = align_malloc(BUFFER_SIZE, min_alignment);
|
|
if (NULL == gOut_Ref2) return TEST_FAIL;
|
|
|
|
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
{
|
|
gOut[i] = align_malloc(BUFFER_SIZE, min_alignment);
|
|
if (NULL == gOut[i]) return TEST_FAIL;
|
|
gOut2[i] = align_malloc(BUFFER_SIZE, min_alignment);
|
|
if (NULL == gOut2[i]) return TEST_FAIL;
|
|
}
|
|
|
|
cl_mem_flags device_flags = CL_MEM_READ_ONLY;
|
|
// save a copy on the host device to make this go faster
|
|
if (CL_DEVICE_TYPE_CPU == device_type)
|
|
device_flags |= CL_MEM_USE_HOST_PTR;
|
|
else
|
|
device_flags |= CL_MEM_COPY_HOST_PTR;
|
|
|
|
// setup input buffers
|
|
gInBuffer =
|
|
clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
|
|
if (gInBuffer == NULL || error)
|
|
{
|
|
vlog_error("clCreateBuffer1 failed for input (%d)\n", error);
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
gInBuffer2 =
|
|
clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn2, &error);
|
|
if (gInBuffer2 == NULL || error)
|
|
{
|
|
vlog_error("clCreateArray2 failed for input (%d)\n", error);
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
gInBuffer3 =
|
|
clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn3, &error);
|
|
if (gInBuffer3 == NULL || error)
|
|
{
|
|
vlog_error("clCreateArray3 failed for input (%d)\n", error);
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
|
|
// setup output buffers
|
|
device_flags = CL_MEM_READ_WRITE;
|
|
// save a copy on the host device to make this go faster
|
|
if (CL_DEVICE_TYPE_CPU == device_type)
|
|
device_flags |= CL_MEM_USE_HOST_PTR;
|
|
else
|
|
device_flags |= CL_MEM_COPY_HOST_PTR;
|
|
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
{
|
|
gOutBuffer[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
|
|
gOut[i], &error);
|
|
if (gOutBuffer[i] == NULL || error)
|
|
{
|
|
vlog_error("clCreateArray failed for output (%d)\n", error);
|
|
return TEST_FAIL;
|
|
}
|
|
gOutBuffer2[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
|
|
gOut2[i], &error);
|
|
if (gOutBuffer2[i] == NULL || error)
|
|
{
|
|
vlog_error("clCreateArray2 failed for output (%d)\n", error);
|
|
return TEST_FAIL;
|
|
}
|
|
}
|
|
|
|
// we are embedded, check current rounding mode
|
|
if (gIsEmbedded)
|
|
{
|
|
gIsInRTZMode = IsInRTZMode();
|
|
}
|
|
|
|
// Check tininess detection
|
|
IsTininessDetectedBeforeRounding();
|
|
|
|
cl_platform_id platform;
|
|
int err = clGetPlatformIDs(1, &platform, NULL);
|
|
if (err)
|
|
{
|
|
print_error(err, "clGetPlatformIDs failed");
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
char c[1024];
|
|
static const char *no_yes[] = { "NO", "YES" };
|
|
vlog("\nCompute Device info:\n");
|
|
clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL);
|
|
vlog("\tPlatform Version: %s\n", c);
|
|
clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), &c, NULL);
|
|
vlog("\tDevice Name: %s\n", c);
|
|
clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), &c, NULL);
|
|
vlog("\tVendor: %s\n", c);
|
|
clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), &c, NULL);
|
|
vlog("\tDevice Version: %s\n", c);
|
|
clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
|
|
vlog("\tCL C Version: %s\n", c);
|
|
clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), &c, NULL);
|
|
vlog("\tDriver Version: %s\n", c);
|
|
vlog("\tDevice Frequency: %d MHz\n", gDeviceFrequency);
|
|
vlog("\tSubnormal values supported for floats? %s\n",
|
|
no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)]);
|
|
vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
|
|
no_yes[0
|
|
!= (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
|
|
if (gToggleCorrectlyRoundedDivideSqrt)
|
|
{
|
|
gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
|
|
}
|
|
vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
|
|
no_yes[0
|
|
!= (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
|
|
vlog("\tTesting with FTZ mode ON for floats? %s\n",
|
|
no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
|
|
vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);
|
|
vlog("\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed]);
|
|
if (gTestFastRelaxed)
|
|
{
|
|
vlog("\tFast relaxed math has derived implementations? %s\n",
|
|
no_yes[0 != gFastRelaxedDerived]);
|
|
}
|
|
vlog("\tTesting double precision? %s\n", no_yes[0 != gHasDouble]);
|
|
if (sizeof(long double) == sizeof(double) && gHasDouble)
|
|
{
|
|
vlog("\n\t\tWARNING: Host system long double does not have better "
|
|
"precision than double!\n");
|
|
vlog("\t\t All double results that do not match the reference "
|
|
"result have their reported\n");
|
|
vlog("\t\t error inflated by 0.5 ulps to account for the fact "
|
|
"that this system\n");
|
|
vlog("\t\t can not accurately represent the right result to an "
|
|
"accuracy closer\n");
|
|
vlog("\t\t than half an ulp. See comments in "
|
|
"Bruteforce_Ulp_Error_Double() for more details.\n\n");
|
|
}
|
|
|
|
vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]);
|
|
if (gIsEmbedded)
|
|
vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]);
|
|
vlog("\tTininess is detected before rounding? %s\n",
|
|
no_yes[0 != gCheckTininessBeforeRounding]);
|
|
vlog("\tWorker threads: %d\n", GetThreadCount());
|
|
vlog("\tTesting vector sizes:");
|
|
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
vlog("\t%d", sizeValues[i]);
|
|
|
|
vlog("\n");
|
|
vlog("\tVerbose? %s\n", no_yes[0 != gVerboseBruteForce]);
|
|
vlog("\n\n");
|
|
|
|
// Check to see if we are using single threaded mode on other than a 1.0
|
|
// device
|
|
if (getenv("CL_TEST_SINGLE_THREADED"))
|
|
{
|
|
|
|
char device_version[1024] = { 0 };
|
|
clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(device_version),
|
|
device_version, NULL);
|
|
|
|
if (strcmp("OpenCL 1.0 ", device_version))
|
|
{
|
|
vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. "
|
|
"Running single threaded.\n");
|
|
}
|
|
}
|
|
|
|
return TEST_PASS;
|
|
}
|
|
|
|
static void ReleaseCL(void)
|
|
{
|
|
uint32_t i;
|
|
clReleaseMemObject(gInBuffer);
|
|
clReleaseMemObject(gInBuffer2);
|
|
clReleaseMemObject(gInBuffer3);
|
|
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
{
|
|
clReleaseMemObject(gOutBuffer[i]);
|
|
clReleaseMemObject(gOutBuffer2[i]);
|
|
}
|
|
clReleaseCommandQueue(gQueue);
|
|
clReleaseContext(gContext);
|
|
|
|
align_free(gIn);
|
|
align_free(gIn2);
|
|
align_free(gIn3);
|
|
align_free(gOut_Ref);
|
|
align_free(gOut_Ref2);
|
|
|
|
for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
|
{
|
|
align_free(gOut[i]);
|
|
align_free(gOut2[i]);
|
|
}
|
|
}
|
|
|
|
void _LogBuildError(cl_program p, int line, const char *file)
|
|
{
|
|
char the_log[2048] = "";
|
|
|
|
vlog_error("%s:%d: Build Log:\n", file, line);
|
|
if (0
|
|
== clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG,
|
|
sizeof(the_log), the_log, NULL))
|
|
vlog_error("%s", the_log);
|
|
else
|
|
vlog_error("*** Error getting build log for program %p\n", p);
|
|
}
|
|
|
|
int InitILogbConstants(void)
|
|
{
|
|
int error;
|
|
const char *kernelSource =
|
|
R"(__kernel void GetILogBConstants( __global int *out )
|
|
{
|
|
out[0] = FP_ILOGB0;
|
|
out[1] = FP_ILOGBNAN;
|
|
})";
|
|
|
|
clProgramWrapper query;
|
|
clKernelWrapper kernel;
|
|
error = create_single_kernel_helper(gContext, &query, &kernel, 1,
|
|
&kernelSource, "GetILogBConstants");
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
vlog_error("Error: Unable to create kernel to get FP_ILOGB0 and "
|
|
"FP_ILOGBNAN for the device. (%d)",
|
|
error);
|
|
return error;
|
|
}
|
|
|
|
if ((error =
|
|
clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
|
|
&gOutBuffer[gMinVectorSizeIndex])))
|
|
{
|
|
vlog_error("Error: Unable to set kernel arg to get FP_ILOGB0 and "
|
|
"FP_ILOGBNAN for the device. Err = %d",
|
|
error);
|
|
return error;
|
|
}
|
|
|
|
size_t dim = 1;
|
|
if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
|
|
NULL, NULL)))
|
|
{
|
|
vlog_error("Error: Unable to execute kernel to get FP_ILOGB0 and "
|
|
"FP_ILOGBNAN for the device. Err = %d",
|
|
error);
|
|
return error;
|
|
}
|
|
|
|
struct
|
|
{
|
|
cl_int ilogb0, ilogbnan;
|
|
} data;
|
|
if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
|
|
CL_TRUE, 0, sizeof(data), &data, 0, NULL,
|
|
NULL)))
|
|
{
|
|
vlog_error("Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the "
|
|
"device. Err = %d",
|
|
error);
|
|
return error;
|
|
}
|
|
|
|
gDeviceILogb0 = data.ilogb0;
|
|
gDeviceILogbNaN = data.ilogbnan;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int IsTininessDetectedBeforeRounding(void)
|
|
{
|
|
int error;
|
|
const char *kernelSource =
|
|
R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out )
|
|
{
|
|
volatile float a = 0x1.000002p-126f;
|
|
volatile float b = 0x1.fffffcp-1f;
|
|
out[0] = a * b; // product is 0x1.fffffffffff8p-127
|
|
})";
|
|
|
|
clProgramWrapper query;
|
|
clKernelWrapper kernel;
|
|
error =
|
|
create_single_kernel_helper(gContext, &query, &kernel, 1, &kernelSource,
|
|
"IsTininessDetectedBeforeRounding");
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
vlog_error("Error: Unable to create kernel to detect how tininess is "
|
|
"detected for the device. (%d)",
|
|
error);
|
|
return error;
|
|
}
|
|
|
|
if ((error =
|
|
clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
|
|
&gOutBuffer[gMinVectorSizeIndex])))
|
|
{
|
|
vlog_error("Error: Unable to set kernel arg to detect how tininess is "
|
|
"detected for the device. Err = %d",
|
|
error);
|
|
return error;
|
|
}
|
|
|
|
size_t dim = 1;
|
|
if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
|
|
NULL, NULL)))
|
|
{
|
|
vlog_error("Error: Unable to execute kernel to detect how tininess is "
|
|
"detected for the device. Err = %d",
|
|
error);
|
|
return error;
|
|
}
|
|
|
|
struct
|
|
{
|
|
cl_uint f;
|
|
} data;
|
|
if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
|
|
CL_TRUE, 0, sizeof(data), &data, 0, NULL,
|
|
NULL)))
|
|
{
|
|
vlog_error("Error: unable to read result from tininess test from the "
|
|
"device. Err = %d",
|
|
error);
|
|
return error;
|
|
}
|
|
|
|
gCheckTininessBeforeRounding = 0 == (data.f & 0x7fffffff);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
|
|
cl_program *p, bool relaxedMode)
|
|
{
|
|
int error = 0;
|
|
char options[200] = "";
|
|
|
|
if (gForceFTZ)
|
|
{
|
|
strcat(options, " -cl-denorms-are-zero");
|
|
}
|
|
|
|
if (relaxedMode)
|
|
{
|
|
strcat(options, " -cl-fast-relaxed-math");
|
|
}
|
|
|
|
error =
|
|
create_single_kernel_helper(gContext, p, k, count, c, name, options);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
vlog_error("\t\tFAILED -- Failed to create kernel. (%d)\n", error);
|
|
return error;
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
int MakeKernels(const char **c, cl_uint count, const char *name,
|
|
cl_uint kernel_count, cl_kernel *k, cl_program *p,
|
|
bool relaxedMode)
|
|
{
|
|
int error = 0;
|
|
cl_uint i;
|
|
char options[200] = "";
|
|
|
|
if (gForceFTZ)
|
|
{
|
|
strcat(options, " -cl-denorms-are-zero ");
|
|
}
|
|
|
|
if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
|
|
{
|
|
strcat(options, " -cl-fp32-correctly-rounded-divide-sqrt ");
|
|
}
|
|
|
|
if (relaxedMode)
|
|
{
|
|
strcat(options, " -cl-fast-relaxed-math");
|
|
}
|
|
|
|
error =
|
|
create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
|
|
return error;
|
|
}
|
|
|
|
|
|
memset(k, 0, kernel_count * sizeof(*k));
|
|
for (i = 0; i < kernel_count; i++)
|
|
{
|
|
k[i] = clCreateKernel(*p, name, &error);
|
|
if (NULL == k[i] || error)
|
|
{
|
|
char buffer[2048] = "";
|
|
|
|
vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
|
|
clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG,
|
|
sizeof(buffer), buffer, NULL);
|
|
vlog_error("Log: %s\n", buffer);
|
|
clReleaseProgram(*p);
|
|
return error;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
|
|
static int IsInRTZMode(void)
|
|
{
|
|
int error;
|
|
const char *kernelSource =
|
|
R"(__kernel void GetRoundingMode( __global int *out )
|
|
{
|
|
volatile float a = 0x1.0p23f;
|
|
volatile float b = -0x1.0p23f;
|
|
out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);
|
|
"})";
|
|
|
|
clProgramWrapper query;
|
|
clKernelWrapper kernel;
|
|
error = create_single_kernel_helper(gContext, &query, &kernel, 1,
|
|
&kernelSource, "GetRoundingMode");
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
vlog_error("Error: Unable to create kernel to detect RTZ mode for the "
|
|
"device. (%d)",
|
|
error);
|
|
return error;
|
|
}
|
|
|
|
if ((error =
|
|
clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
|
|
&gOutBuffer[gMinVectorSizeIndex])))
|
|
{
|
|
vlog_error("Error: Unable to set kernel arg to detect RTZ mode for the "
|
|
"device. Err = %d",
|
|
error);
|
|
return error;
|
|
}
|
|
|
|
size_t dim = 1;
|
|
if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
|
|
NULL, NULL)))
|
|
{
|
|
vlog_error("Error: Unable to execute kernel to detect RTZ mode for the "
|
|
"device. Err = %d",
|
|
error);
|
|
return error;
|
|
}
|
|
|
|
struct
|
|
{
|
|
cl_int isRTZ;
|
|
} data;
|
|
if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
|
|
CL_TRUE, 0, sizeof(data), &data, 0, NULL,
|
|
NULL)))
|
|
{
|
|
vlog_error(
|
|
"Error: unable to read RTZ mode data from the device. Err = %d",
|
|
error);
|
|
return error;
|
|
}
|
|
|
|
return data.isRTZ;
|
|
}
|
|
|
|
#pragma mark -
|
|
|
|
const char *sizeNames[VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
|
|
const int sizeValues[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
|
|
|
|
// TODO: There is another version of Ulp_Error_Double defined in
|
|
// test_common/harness/errorHelpers.c
|
|
float Bruteforce_Ulp_Error_Double(double test, long double reference)
|
|
{
|
|
// Check for Non-power-of-two and NaN
|
|
|
|
// Note: This function presumes that someone has already tested whether the
|
|
// result is correctly, rounded before calling this function. That test:
|
|
//
|
|
// if( (float) reference == test )
|
|
// return 0.0f;
|
|
//
|
|
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out
|
|
// before we get here. Otherwise, we'll return inf ulp error here, for what
|
|
// are otherwise correctly rounded results.
|
|
|
|
// Deal with long double = double
|
|
// On most systems long double is a higher precision type than double. They
|
|
// provide either a 80-bit or greater floating point type, or they provide a
|
|
// head-tail double double format. That is sufficient to represent the
|
|
// accuracy of a floating point result to many more bits than double and we
|
|
// can calculate sub-ulp errors. This is the standard system for which this
|
|
// test suite is designed.
|
|
//
|
|
// On some systems double and long double are the same thing. Then we run
|
|
// into a problem, because our representation of the infinitely precise
|
|
// result (passed in as reference above) can be off by as much as a half
|
|
// double precision ulp itself. In this case, we inflate the reported error
|
|
// by half an ulp to take this into account. A more correct and permanent
|
|
// fix would be to undertake refactoring the reference code to return
|
|
// results in this format:
|
|
//
|
|
// typedef struct DoubleReference
|
|
// { // true value = correctlyRoundedResult + ulps *
|
|
// ulp(correctlyRoundedResult) (infinitely precise)
|
|
// double correctlyRoundedResult; // as best we can
|
|
// double ulps; // plus a fractional amount to
|
|
// account for the difference
|
|
// }DoubleReference; // between infinitely
|
|
// precise result and correctlyRoundedResult, in units of ulps.
|
|
//
|
|
// This would provide a useful higher-than-double precision format for
|
|
// everyone that we can use, and would solve a few problems with
|
|
// representing absolute errors below DBL_MIN and over DBL_MAX for systems
|
|
// that use a head to tail double double for long double.
|
|
|
|
int x;
|
|
long double testVal = test;
|
|
|
|
// First, handle special reference values
|
|
if (isinf(reference))
|
|
{
|
|
if (reference == testVal) return 0.0f;
|
|
|
|
return INFINITY;
|
|
}
|
|
|
|
if (isnan(reference))
|
|
{
|
|
if (isnan(testVal)) return 0.0f;
|
|
|
|
return INFINITY;
|
|
}
|
|
|
|
if (0.0L != reference && 0.5L != frexpl(reference, &x))
|
|
{ // Non-zero and Non-power of two
|
|
|
|
// allow correctly rounded results to pass through unmolested. (We might
|
|
// add error to it below.) There is something of a performance
|
|
// optimization here.
|
|
if (testVal == reference) return 0.0f;
|
|
|
|
// The unbiased exponent of the ulp unit place
|
|
int ulp_exp =
|
|
DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1);
|
|
|
|
// Scale the exponent of the error
|
|
float result = (float)scalbnl(testVal - reference, ulp_exp);
|
|
|
|
// account for rounding error in reference result on systems that do not
|
|
// have a higher precision floating point type (see above)
|
|
if (sizeof(long double) == sizeof(double))
|
|
result += copysignf(0.5f, result);
|
|
|
|
return result;
|
|
}
|
|
|
|
// reference is a normal power of two or a zero
|
|
// The unbiased exponent of the ulp unit place
|
|
int ulp_exp =
|
|
DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
|
|
|
|
// allow correctly rounded results to pass through unmolested. (We might add
|
|
// error to it below.) There is something of a performance optimization here
|
|
// too.
|
|
if (testVal == reference) return 0.0f;
|
|
|
|
// Scale the exponent of the error
|
|
float result = (float)scalbnl(testVal - reference, ulp_exp);
|
|
|
|
// account for rounding error in reference result on systems that do not
|
|
// have a higher precision floating point type (see above)
|
|
if (sizeof(long double) == sizeof(double))
|
|
result += copysignf(0.5f, result);
|
|
|
|
return result;
|
|
}
|
|
|
|
float Abs_Error(float test, double reference)
|
|
{
|
|
if (isnan(test) && isnan(reference)) return 0.0f;
|
|
return fabs((float)(reference - (double)test));
|
|
}
|
|
|
|
#if defined(__APPLE__)
|
|
#include <mach/mach_time.h>
|
|
#endif
|
|
|
|
uint64_t GetTime(void)
|
|
{
|
|
#if defined(__APPLE__)
|
|
return mach_absolute_time();
|
|
#elif defined(_WIN32) && defined(_MSC_VER)
|
|
return ReadTime();
|
|
#else
|
|
// mach_absolute_time is a high precision timer with precision < 1 microsecond.
|
|
#warning need accurate clock here. Times are invalid.
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
|
|
#if defined(_WIN32) && defined(_MSC_VER)
|
|
/* function is defined in "compat.h" */
|
|
#else
|
|
double SubtractTime(uint64_t endTime, uint64_t startTime)
|
|
{
|
|
uint64_t diff = endTime - startTime;
|
|
static double conversion = 0.0;
|
|
|
|
if (0.0 == conversion)
|
|
{
|
|
#if defined(__APPLE__)
|
|
mach_timebase_info_data_t info = { 0, 0 };
|
|
kern_return_t err = mach_timebase_info(&info);
|
|
if (0 == err)
|
|
conversion = 1e-9 * (double)info.numer / (double)info.denom;
|
|
#else
|
|
// This function consumes output from GetTime() above, and converts the time to
|
|
// secionds.
|
|
#warning need accurate ticks to seconds conversion factor here. Times are invalid.
|
|
#endif
|
|
}
|
|
|
|
// strictly speaking we should also be subtracting out timer latency here
|
|
return conversion * (double)diff;
|
|
}
|
|
#endif
|
|
|
|
cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
|
|
{
|
|
if (0 == (x & (x - 1))) return x;
|
|
|
|
while (x & (x - 1)) x &= x - 1;
|
|
|
|
return x + x;
|
|
}
|