mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Make variables local to loops, with appropriate types. These variables are not read after the loop without being reset first, so this patch doesn't change behaviour. These variables should now be used for one purpose only, making it easier to reason about the code. This will make future refactoring easier. Signed-off-by: Marco Antognini <marco.antognini@arm.com>
301 lines
10 KiB
C++
301 lines
10 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
#include "function_list.h"
|
|
#include "test_functions.h"
|
|
#include "utility.h"
|
|
|
|
#include <cstring>
|
|
|
|
static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
|
|
cl_program *p, bool relaxedMode)
|
|
{
|
|
const char *c[] = { "__kernel void math_kernel",
|
|
sizeNames[vectorSize],
|
|
"( __global float",
|
|
sizeNames[vectorSize],
|
|
"* out, __global float",
|
|
sizeNames[vectorSize],
|
|
"* in1, __global float",
|
|
sizeNames[vectorSize],
|
|
"* in2, __global float",
|
|
sizeNames[vectorSize],
|
|
"* in3 )\n"
|
|
"{\n"
|
|
" size_t i = get_global_id(0);\n"
|
|
" out[i] = ",
|
|
name,
|
|
"( in1[i], in2[i], in3[i] );\n"
|
|
"}\n" };
|
|
|
|
const char *c3[] = {
|
|
"__kernel void math_kernel",
|
|
sizeNames[vectorSize],
|
|
"( __global float* out, __global float* in, __global float* in2, "
|
|
"__global float* in3)\n"
|
|
"{\n"
|
|
" size_t i = get_global_id(0);\n"
|
|
" if( i + 1 < get_global_size(0) )\n"
|
|
" {\n"
|
|
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
|
" float3 f1 = vload3( 0, in2 + 3 * i );\n"
|
|
" float3 f2 = vload3( 0, in3 + 3 * i );\n"
|
|
" f0 = ",
|
|
name,
|
|
"( f0, f1, f2 );\n"
|
|
" vstore3( f0, 0, out + 3*i );\n"
|
|
" }\n"
|
|
" else\n"
|
|
" {\n"
|
|
" size_t parity = i & 1; // Figure out how many elements are "
|
|
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
|
"buffer size \n"
|
|
" float3 f0;\n"
|
|
" float3 f1;\n"
|
|
" float3 f2;\n"
|
|
" switch( parity )\n"
|
|
" {\n"
|
|
" case 1:\n"
|
|
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
|
" f1 = (float3)( in2[3*i], NAN, NAN ); \n"
|
|
" f2 = (float3)( in3[3*i], NAN, NAN ); \n"
|
|
" break;\n"
|
|
" case 0:\n"
|
|
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
|
" f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
|
|
" f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
|
|
" break;\n"
|
|
" }\n"
|
|
" f0 = ",
|
|
name,
|
|
"( f0, f1, f2 );\n"
|
|
" switch( parity )\n"
|
|
" {\n"
|
|
" case 0:\n"
|
|
" out[3*i+1] = f0.y; \n"
|
|
" // fall through\n"
|
|
" case 1:\n"
|
|
" out[3*i] = f0.x; \n"
|
|
" break;\n"
|
|
" }\n"
|
|
" }\n"
|
|
"}\n"
|
|
};
|
|
|
|
const char **kern = c;
|
|
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
|
|
|
if (sizeValues[vectorSize] == 3)
|
|
{
|
|
kern = c3;
|
|
kernSize = sizeof(c3) / sizeof(c3[0]);
|
|
}
|
|
|
|
char testName[32];
|
|
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
|
sizeNames[vectorSize]);
|
|
|
|
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
|
}
|
|
|
|
typedef struct BuildKernelInfo
|
|
{
|
|
cl_uint offset; // the first vector size to build
|
|
cl_kernel *kernels;
|
|
cl_program *programs;
|
|
const char *nameInCode;
|
|
bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
|
|
} BuildKernelInfo;
|
|
|
|
static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
|
{
|
|
BuildKernelInfo *info = (BuildKernelInfo *)p;
|
|
cl_uint i = info->offset + job_id;
|
|
return BuildKernel(info->nameInCode, i, info->kernels + i,
|
|
info->programs + i, info->relaxedMode);
|
|
}
|
|
|
|
int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
|
{
|
|
int error;
|
|
|
|
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
|
|
|
|
cl_program programs[VECTOR_SIZE_COUNT];
|
|
cl_kernel kernels[VECTOR_SIZE_COUNT];
|
|
float maxError = 0.0f;
|
|
float maxErrorVal = 0.0f;
|
|
float maxErrorVal2 = 0.0f;
|
|
float maxErrorVal3 = 0.0f;
|
|
uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
|
|
|
|
// Init the kernels
|
|
{
|
|
BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
|
|
f->nameInCode, relaxedMode };
|
|
if ((error = ThreadPool_Do(BuildKernelFn,
|
|
gMaxVectorSizeIndex - gMinVectorSizeIndex,
|
|
&build_info)))
|
|
return error;
|
|
}
|
|
|
|
for (uint64_t i = 0; i < (1ULL << 32); i += step)
|
|
{
|
|
// Init input array
|
|
cl_uint *p = (cl_uint *)gIn;
|
|
cl_uint *p2 = (cl_uint *)gIn2;
|
|
cl_uint *p3 = (cl_uint *)gIn3;
|
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
|
{
|
|
p[j] = genrand_int32(d);
|
|
p2[j] = genrand_int32(d);
|
|
p3[j] = genrand_int32(d);
|
|
}
|
|
|
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
|
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
|
{
|
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
|
return error;
|
|
}
|
|
|
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
|
BUFFER_SIZE, gIn2, 0, NULL, NULL)))
|
|
{
|
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
|
return error;
|
|
}
|
|
|
|
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
|
|
BUFFER_SIZE, gIn3, 0, NULL, NULL)))
|
|
{
|
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
|
|
return error;
|
|
}
|
|
|
|
// write garbage into output arrays
|
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
|
{
|
|
uint32_t pattern = 0xffffdead;
|
|
memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
|
|
if ((error =
|
|
clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
|
|
BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
|
|
{
|
|
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
|
|
error, j);
|
|
goto exit;
|
|
}
|
|
}
|
|
|
|
// Run the kernels
|
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
|
{
|
|
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
|
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
|
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
|
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
|
&gOutBuffer[j])))
|
|
{
|
|
LogBuildError(programs[j]);
|
|
goto exit;
|
|
}
|
|
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
|
|
&gInBuffer)))
|
|
{
|
|
LogBuildError(programs[j]);
|
|
goto exit;
|
|
}
|
|
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
|
|
&gInBuffer2)))
|
|
{
|
|
LogBuildError(programs[j]);
|
|
goto exit;
|
|
}
|
|
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
|
|
&gInBuffer3)))
|
|
{
|
|
LogBuildError(programs[j]);
|
|
goto exit;
|
|
}
|
|
|
|
if ((error =
|
|
clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
|
&localCount, NULL, 0, NULL, NULL)))
|
|
{
|
|
vlog_error("FAILED -- could not execute kernel\n");
|
|
goto exit;
|
|
}
|
|
}
|
|
|
|
// Get that moving
|
|
if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
|
|
|
|
// Calculate the correctly rounded reference result
|
|
float *r = (float *)gOut_Ref;
|
|
float *s = (float *)gIn;
|
|
float *s2 = (float *)gIn2;
|
|
float *s3 = (float *)gIn3;
|
|
for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
|
r[j] = (float)f->func.f_fff(s[j], s2[j], s3[j]);
|
|
|
|
// Read the data back
|
|
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
|
{
|
|
if ((error =
|
|
clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
|
|
BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
|
|
{
|
|
vlog_error("ReadArray failed %d\n", error);
|
|
goto exit;
|
|
}
|
|
}
|
|
|
|
if (gSkipCorrectnessTesting) break;
|
|
|
|
// Verify data -- No verification possible.
|
|
// MAD is a random number generator.
|
|
if (0 == (i & 0x0fffffff))
|
|
{
|
|
vlog(".");
|
|
fflush(stdout);
|
|
}
|
|
}
|
|
|
|
if (!gSkipCorrectnessTesting)
|
|
{
|
|
if (gWimpyMode)
|
|
vlog("Wimp pass");
|
|
else
|
|
vlog("passed");
|
|
|
|
vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
|
|
maxErrorVal3);
|
|
}
|
|
|
|
vlog("\n");
|
|
|
|
exit:
|
|
// Release
|
|
for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
|
|
{
|
|
clReleaseKernel(kernels[k]);
|
|
clReleaseProgram(programs[k]);
|
|
}
|
|
|
|
return error;
|
|
}
|