mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-20 14:39:01 +00:00
Synchronise with Khronos-private Gitlab branch
The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -28,7 +28,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata);
|
||||
int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C"
|
||||
extern "C"
|
||||
#endif
|
||||
|
||||
const vtbl _binary_operator = { "binaryOperator", TestFunc_Float_Float_Float_Operator, TestFunc_Double_Double_Double_Operator };
|
||||
@@ -38,7 +38,7 @@ static int BuildKernelDouble( const char *name, const char *operator_symbol, int
|
||||
|
||||
static int BuildKernel( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
|
||||
{
|
||||
const char *c[] = {
|
||||
const char *c[] = {
|
||||
"__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
@@ -86,24 +86,24 @@ static int BuildKernel( const char *name, const char *operator_symbol, int vecto
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c)/sizeof(c[0]);
|
||||
|
||||
|
||||
if( sizeValues[vectorSize] == 3 )
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3)/sizeof(c3[0]);
|
||||
}
|
||||
|
||||
|
||||
char testName[32];
|
||||
snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] );
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
|
||||
}
|
||||
|
||||
static int BuildKernelDouble( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
|
||||
{
|
||||
const char *c[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
const char *c[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
@@ -111,7 +111,7 @@ static int BuildKernelDouble( const char *name, const char *operator_symbol, int
|
||||
"}\n"
|
||||
};
|
||||
const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
|
||||
"__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n"
|
||||
"__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
@@ -152,18 +152,18 @@ static int BuildKernelDouble( const char *name, const char *operator_symbol, int
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c)/sizeof(c[0]);
|
||||
|
||||
|
||||
if( sizeValues[vectorSize] == 3 )
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3)/sizeof(c3[0]);
|
||||
}
|
||||
|
||||
|
||||
char testName[32];
|
||||
snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] );
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
@@ -224,20 +224,20 @@ typedef struct TestInfo
|
||||
static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const float specialValuesFloat[] = {
|
||||
-NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f,
|
||||
-3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
|
||||
static const float specialValuesFloat[] = {
|
||||
-NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f,
|
||||
-3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
|
||||
|
||||
+NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
|
||||
+3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
|
||||
|
||||
+NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
|
||||
+3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
|
||||
};
|
||||
|
||||
static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
|
||||
@@ -264,7 +264,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for every thread
|
||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||
{
|
||||
@@ -325,7 +325,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->name, f->nameInCode };
|
||||
if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
|
||||
goto exit;
|
||||
|
||||
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
@@ -340,7 +340,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
|
||||
@@ -349,7 +349,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
else
|
||||
vlog( "passed." );
|
||||
}
|
||||
|
||||
|
||||
|
||||
if( gMeasureTimes )
|
||||
{
|
||||
@@ -400,7 +400,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
vlog_error( "Error %d at clFinish\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime( endTime, startTime );
|
||||
sum += time;
|
||||
@@ -414,7 +414,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( ! gSkipCorrectnessTesting )
|
||||
vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
|
||||
vlog( "\n" );
|
||||
@@ -443,7 +443,7 @@ exit:
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
|
||||
free( test_info.tinfo );
|
||||
}
|
||||
|
||||
@@ -476,30 +476,30 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush failed\n" );
|
||||
|
||||
|
||||
//Init input array
|
||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
|
||||
int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
|
||||
int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
{ // test edge cases
|
||||
float *fp = (float *)p;
|
||||
float *fp2 = (float *)p2;
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesFloatCount;
|
||||
y = (job_id * buffer_elements) / specialValuesFloatCount;
|
||||
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesFloatCount;
|
||||
y = (job_id * buffer_elements) / specialValuesFloatCount;
|
||||
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
fp[j] = specialValuesFloat[x];
|
||||
@@ -520,7 +520,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
}
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
|
||||
@@ -538,15 +538,15 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
//Wait for the map to finish
|
||||
if( (error = clWaitForEvents(1, e + j) ))
|
||||
{
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
if( (error = clReleaseEvent( e[j] ) ))
|
||||
{
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry over
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
@@ -560,7 +560,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
|
||||
if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
|
||||
@@ -587,33 +587,33 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
memset( &oldMode, 0, sizeof( oldMode ) );
|
||||
if( ftz )
|
||||
ForceFTZ( &oldMode );
|
||||
|
||||
|
||||
// Set the rounding mode to match the device
|
||||
RoundingMode oldRoundMode = kRoundToNearestEven;
|
||||
if (gIsInRTZMode)
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
|
||||
//Calculate the correctly rounded reference result
|
||||
float *r = (float *)gOut_Ref + thread_id * buffer_elements;
|
||||
float *s = (float *)gIn + thread_id * buffer_elements;
|
||||
float *s2 = (float *)gIn2 + thread_id * buffer_elements;
|
||||
if( gInfNanSupport )
|
||||
if( gInfNanSupport )
|
||||
{
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
r[j] = (float) func.f_ff( s[j], s2[j] );
|
||||
}
|
||||
else
|
||||
}
|
||||
else
|
||||
{
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
{
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
r[j] = (float) func.f_ff( s[j], s2[j] );
|
||||
overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (gIsInRTZMode)
|
||||
(void)set_round(oldRoundMode, kfloat);
|
||||
(void)set_round(oldRoundMode, kfloat);
|
||||
|
||||
if( ftz )
|
||||
RestoreFPState( &oldMode );
|
||||
@@ -626,7 +626,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the last buffer
|
||||
@@ -635,7 +635,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
//Verify data
|
||||
cl_uint *t = (cl_uint *)r;
|
||||
@@ -644,30 +644,30 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
|
||||
{
|
||||
cl_uint *q = out[k];
|
||||
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if( t[j] != q[j] )
|
||||
{
|
||||
float test = ((float*) q)[j];
|
||||
double correct = func.f_ff( s[j], s2[j] );
|
||||
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
|
||||
if ( !gInfNanSupport)
|
||||
if ( !gInfNanSupport)
|
||||
{
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( overflow[j] ||
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( overflow[j] ||
|
||||
IsFloatInfinity(correct) || IsFloatNaN(correct) ||
|
||||
IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j]) ||
|
||||
IsFloatInfinity(s[j]) || IsFloatNaN(s[j]) )
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Per section 10 paragraph 6, accept embedded devices always returning positive 0.0.
|
||||
if (gIsEmbedded && (t[j] == 0x80000000) && (q[j] == 0x00000000)) continue;
|
||||
// Per section 10 paragraph 6, accept embedded devices always returning positive 0.0.
|
||||
if (gIsEmbedded && (t[j] == 0x80000000) && (q[j] == 0x00000000)) continue;
|
||||
|
||||
float err = Ulp_Error( test, correct );
|
||||
int fail = ! (fabsf(err) <= ulps);
|
||||
|
||||
|
||||
if( fail && ftz )
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
@@ -683,25 +683,25 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
double correct2, correct3;
|
||||
float err2, err3;
|
||||
|
||||
|
||||
if( !gInfNanSupport )
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
|
||||
|
||||
correct2 = func.f_ff( 0.0, s2[j] );
|
||||
correct3 = func.f_ff( -0.0, s2[j] );
|
||||
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
|
||||
if( !gInfNanSupport )
|
||||
{
|
||||
if( fetestexcept(FE_OVERFLOW) )
|
||||
continue;
|
||||
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
|
||||
IsFloatInfinity(correct3) || IsFloatNaN(correct3) )
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
err2 = Ulp_Error( test, correct2 );
|
||||
err3 = Ulp_Error( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
|
||||
@@ -709,7 +709,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -717,13 +717,13 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
if( ! fail )
|
||||
err = 0.0f;
|
||||
}
|
||||
|
||||
|
||||
//try with both args as zero
|
||||
if( IsFloatSubnormal( s2[j] ) )
|
||||
{
|
||||
double correct4, correct5;
|
||||
float err4, err5;
|
||||
|
||||
float err4, err5;
|
||||
|
||||
if( !gInfNanSupport )
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
|
||||
@@ -731,21 +731,21 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
correct3 = func.f_ff( -0.0, 0.0 );
|
||||
correct4 = func.f_ff( 0.0, -0.0 );
|
||||
correct5 = func.f_ff( -0.0, -0.0 );
|
||||
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
|
||||
if( !gInfNanSupport )
|
||||
{
|
||||
if( fetestexcept(FE_OVERFLOW) )
|
||||
continue;
|
||||
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
|
||||
IsFloatInfinity(correct3) || IsFloatNaN(correct3) ||
|
||||
IsFloatInfinity(correct4) || IsFloatNaN(correct4) ||
|
||||
IsFloatInfinity(correct5) || IsFloatNaN(correct5) )
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
err2 = Ulp_Error( test, correct2 );
|
||||
err3 = Ulp_Error( test, correct3 );
|
||||
err4 = Ulp_Error( test, correct4 );
|
||||
@@ -778,18 +778,18 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
if( !gInfNanSupport )
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
|
||||
|
||||
correct2 = func.f_ff( s[j], 0.0 );
|
||||
correct3 = func.f_ff( s[j], -0.0 );
|
||||
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
|
||||
if ( !gInfNanSupport)
|
||||
if ( !gInfNanSupport)
|
||||
{
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( overflow[j] ||
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( overflow[j] ||
|
||||
IsFloatInfinity(correct) || IsFloatNaN(correct) ||
|
||||
IsFloatInfinity(correct2)|| IsFloatNaN(correct2) )
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
|
||||
err2 = Ulp_Error( test, correct2 );
|
||||
@@ -799,7 +799,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -809,13 +809,13 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
if( fabsf(err ) > tinfo->maxError )
|
||||
{
|
||||
tinfo->maxError = fabsf(err);
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
}
|
||||
if( fail )
|
||||
{
|
||||
@@ -833,9 +833,9 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush 3 failed\n" );
|
||||
|
||||
@@ -848,28 +848,28 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
exit:
|
||||
if( overflow )
|
||||
free( overflow );
|
||||
free( overflow );
|
||||
return error;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const double specialValuesDouble[] = {
|
||||
-NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5,
|
||||
-3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
|
||||
static const double specialValuesDouble[] = {
|
||||
-NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5,
|
||||
-3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
|
||||
|
||||
+NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5,
|
||||
+3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
|
||||
|
||||
+NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5,
|
||||
+3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
|
||||
};
|
||||
|
||||
@@ -897,7 +897,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for every thread
|
||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||
{
|
||||
@@ -959,7 +959,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->name, f->nameInCode };
|
||||
if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
|
||||
goto exit;
|
||||
|
||||
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
@@ -974,7 +974,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
|
||||
@@ -983,7 +983,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
else
|
||||
vlog( "passed." );
|
||||
}
|
||||
|
||||
|
||||
if( gMeasureTimes )
|
||||
{
|
||||
//Init input arrays
|
||||
@@ -1033,7 +1033,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
vlog_error( "Error %d at clFinish\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime( endTime, startTime );
|
||||
sum += time;
|
||||
@@ -1049,14 +1049,14 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
for( ; j < gMaxVectorSizeIndex; j++ )
|
||||
vlog( "\t -- " );
|
||||
}
|
||||
|
||||
|
||||
if( ! gSkipCorrectnessTesting )
|
||||
vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
|
||||
vlog( "\n" );
|
||||
|
||||
|
||||
exit:
|
||||
// Release
|
||||
// Release
|
||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
@@ -1079,7 +1079,7 @@ exit:
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
|
||||
free( test_info.tinfo );
|
||||
}
|
||||
|
||||
@@ -1113,29 +1113,29 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush failed\n" );
|
||||
|
||||
|
||||
//Init input array
|
||||
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
||||
cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
{ // test edge cases
|
||||
cl_double *fp = (cl_double *)p;
|
||||
cl_double *fp2 = (cl_double *)p2;
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesDoubleCount;
|
||||
y = (job_id * buffer_elements) / specialValuesDoubleCount;
|
||||
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesDoubleCount;
|
||||
y = (job_id * buffer_elements) / specialValuesDoubleCount;
|
||||
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
fp[j] = specialValuesDouble[x];
|
||||
@@ -1156,7 +1156,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
p[j] = genrand_int64(d);
|
||||
p2[j] = genrand_int64(d);
|
||||
}
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
|
||||
@@ -1174,15 +1174,15 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
//Wait for the map to finish
|
||||
if( (error = clWaitForEvents(1, e + j) ))
|
||||
{
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
if( (error = clReleaseEvent( e[j] ) ))
|
||||
{
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry over
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
@@ -1196,7 +1196,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
|
||||
if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
|
||||
@@ -1214,14 +1214,14 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
if( gSkipCorrectnessTesting )
|
||||
return CL_SUCCESS;
|
||||
|
||||
|
||||
//Calculate the correctly rounded reference result
|
||||
cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
||||
cl_double *s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||
cl_double *s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
r[j] = (cl_double) func.f_ff( s[j], s2[j] );
|
||||
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
|
||||
for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
@@ -1230,7 +1230,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the last buffer
|
||||
@@ -1239,7 +1239,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
//Verify data
|
||||
cl_ulong *t = (cl_ulong *)r;
|
||||
@@ -1248,7 +1248,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
|
||||
{
|
||||
cl_ulong *q = out[k];
|
||||
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if( t[j] != q[j] )
|
||||
{
|
||||
@@ -1256,7 +1256,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
long double correct = func.f_ff( s[j], s2[j] );
|
||||
float err = Ulp_Error_Double( test, correct );
|
||||
int fail = ! (fabsf(err) <= ulps);
|
||||
|
||||
|
||||
if( fail && ftz )
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
@@ -1267,7 +1267,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = 0.0f;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// retry per section 6.5.3.3
|
||||
if( IsDoubleSubnormal( s[j] ) )
|
||||
{
|
||||
@@ -1280,7 +1280,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -1288,7 +1288,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
if( ! fail )
|
||||
err = 0.0f;
|
||||
}
|
||||
|
||||
|
||||
//try with both args as zero
|
||||
if( IsDoubleSubnormal( s2[j] ) )
|
||||
{
|
||||
@@ -1332,7 +1332,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -1342,12 +1342,12 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( fabsf(err ) > tinfo->maxError )
|
||||
{
|
||||
tinfo->maxError = fabsf(err);
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
}
|
||||
if( fail )
|
||||
{
|
||||
@@ -1358,16 +1358,16 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush 3 failed\n" );
|
||||
|
||||
@@ -1380,7 +1380,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
exit:
|
||||
return error;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user