Files
OpenCL-CTS/test_conformance/math_brute_force/common.cpp
Sven van Haastregt 63274f97b7 math_brute_force: Factor out GetUnaryKernel and GetBinaryKernel (#1525)
Use common functions to create the kernel source code for testing
1-argument and 2-argument math builtins.  This reduces code duplication.

Use appropriate patterns to initialise variables to their full bit
widths.  For example, `0xdead` was previously used to initialise 32-bit
integers, while now a larger number spanning all bytes is used.

Co-authored-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
Co-authored-by: Marco Antognini <marco.antognini@arm.com>
2022-11-01 13:08:36 -07:00

533 lines
16 KiB
C++

//
// Copyright (c) 2022 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
#include "utility.h" // for sizeNames and sizeValues.
#include <sstream>
#include <string>
namespace {
const char *GetTypeName(ParameterType type)
{
switch (type)
{
case ParameterType::Float: return "float";
case ParameterType::Double: return "double";
case ParameterType::Int: return "int";
case ParameterType::UInt: return "uint";
case ParameterType::Long: return "long";
case ParameterType::ULong: return "ulong";
}
return nullptr;
}
const char *GetUndefValue(ParameterType type)
{
switch (type)
{
case ParameterType::Float:
case ParameterType::Double: return "NAN";
case ParameterType::Int:
case ParameterType::UInt: return "0x12345678";
case ParameterType::Long:
case ParameterType::ULong: return "0x0ddf00dbadc0ffee";
}
return nullptr;
}
void EmitDefineType(std::ostringstream &kernel, const char *name,
ParameterType type, int vector_size_index)
{
kernel << "#define " << name << " " << GetTypeName(type)
<< sizeNames[vector_size_index] << '\n';
kernel << "#define " << name << "_SCALAR " << GetTypeName(type) << '\n';
}
void EmitDefineUndef(std::ostringstream &kernel, const char *name,
ParameterType type)
{
kernel << "#define " << name << " " << GetUndefValue(type) << '\n';
}
void EmitEnableExtension(std::ostringstream &kernel, ParameterType type)
{
switch (type)
{
case ParameterType::Double:
kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
break;
case ParameterType::Float:
case ParameterType::Int:
case ParameterType::UInt:
case ParameterType::Long:
case ParameterType::ULong:
// No extension required.
break;
}
}
} // anonymous namespace
std::string GetKernelName(int vector_size_index)
{
return std::string("math_kernel") + sizeNames[vector_size_index];
}
std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
ParameterType retType, ParameterType type1,
int vector_size_index)
{
// To keep the kernel code readable, use macros for types and undef values.
std::ostringstream kernel;
EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
EmitDefineUndef(kernel, "UNDEF1", type1);
EmitEnableExtension(kernel, type1);
// clang-format off
const char *kernel_nonvec3[] = { R"(
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
__global TYPE1* in1)
{
size_t i = get_global_id(0);
out[i] = )", builtin, R"((in1[i]);
}
)" };
const char *kernel_vec3[] = { R"(
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
__global TYPE1_SCALAR* in1)
{
size_t i = get_global_id(0);
if (i + 1 < get_global_size(0))
{
TYPE1 a = vload3(0, in1 + 3 * i);
RETTYPE res = )", builtin, R"((a);
vstore3(res, 0, out + 3 * i);
}
else
{
// Figure out how many elements are left over after
// BUFFER_SIZE % (3 * sizeof(type)).
// Assume power of two buffer size.
size_t parity = i & 1;
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
switch (parity)
{
case 0:
a.y = in1[3 * i + 1];
// fall through
case 1:
a.x = in1[3 * i];
break;
}
RETTYPE res = )", builtin, R"((a);
switch (parity)
{
case 0:
out[3 * i + 1] = res.y;
// fall through
case 1:
out[3 * i] = res.x;
break;
}
}
}
)" };
// clang-format on
if (sizeValues[vector_size_index] != 3)
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
else
for (const auto &chunk : kernel_vec3) kernel << chunk;
return kernel.str();
}
std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
ParameterType retType1, ParameterType retType2,
ParameterType type1, int vector_size_index)
{
// To keep the kernel code readable, use macros for types and undef values.
std::ostringstream kernel;
EmitDefineType(kernel, "RETTYPE1", retType1, vector_size_index);
EmitDefineType(kernel, "RETTYPE2", retType2, vector_size_index);
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
EmitDefineUndef(kernel, "UNDEF1", type1);
EmitDefineUndef(kernel, "UNDEFR2", retType2);
EmitEnableExtension(kernel, type1);
// clang-format off
const char *kernel_nonvec3[] = { R"(
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1* out1,
__global RETTYPE2* out2,
__global TYPE1* in1)
{
size_t i = get_global_id(0);
out1[i] = )", builtin, R"((in1[i], out2 + i);
}
)" };
const char *kernel_vec3[] = { R"(
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1_SCALAR* out1,
__global RETTYPE2_SCALAR* out2,
__global TYPE1_SCALAR* in1)
{
size_t i = get_global_id(0);
if (i + 1 < get_global_size(0))
{
TYPE1 a = vload3(0, in1 + 3 * i);
RETTYPE2 res2 = UNDEFR2;
RETTYPE1 res1 = )", builtin, R"((a, &res2);
vstore3(res1, 0, out1 + 3 * i);
vstore3(res2, 0, out2 + 3 * i);
}
else
{
// Figure out how many elements are left over after
// BUFFER_SIZE % (3 * sizeof(type)).
// Assume power of two buffer size.
size_t parity = i & 1;
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
switch (parity)
{
case 0:
a.y = in1[3 * i + 1];
// fall through
case 1:
a.x = in1[3 * i];
break;
}
RETTYPE2 res2 = UNDEFR2;
RETTYPE1 res1 = )", builtin, R"((a, &res2);
switch (parity)
{
case 0:
out1[3 * i + 1] = res1.y;
out2[3 * i + 1] = res2.y;
// fall through
case 1:
out1[3 * i] = res1.x;
out2[3 * i] = res2.x;
break;
}
}
}
)" };
// clang-format on
if (sizeValues[vector_size_index] != 3)
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
else
for (const auto &chunk : kernel_vec3) kernel << chunk;
return kernel.str();
}
std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
ParameterType retType, ParameterType type1,
ParameterType type2, int vector_size_index)
{
// To keep the kernel code readable, use macros for types and undef values.
std::ostringstream kernel;
EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
EmitDefineUndef(kernel, "UNDEF1", type1);
EmitDefineUndef(kernel, "UNDEF2", type2);
EmitEnableExtension(kernel, type1);
const bool is_vec3 = sizeValues[vector_size_index] == 3;
std::string invocation;
if (strlen(builtin) == 1)
{
// Assume a single-character builtin is an operator (e.g., +, *, ...).
invocation = is_vec3 ? "a" : "in1[i] ";
invocation += builtin;
invocation += is_vec3 ? "b" : " in2[i]";
}
else
{
// Otherwise call the builtin as a function with two arguments.
invocation = builtin;
invocation += is_vec3 ? "(a, b)" : "(in1[i], in2[i])";
}
// clang-format off
const char *kernel_nonvec3[] = { R"(
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
__global TYPE1* in1,
__global TYPE2* in2)
{
size_t i = get_global_id(0);
out[i] = )", invocation.c_str(), R"(;
}
)" };
const char *kernel_vec3[] = { R"(
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
__global TYPE1_SCALAR* in1,
__global TYPE2_SCALAR* in2)
{
size_t i = get_global_id(0);
if (i + 1 < get_global_size(0))
{
TYPE1 a = vload3(0, in1 + 3 * i);
TYPE2 b = vload3(0, in2 + 3 * i);
RETTYPE res = )", invocation.c_str(), R"(;
vstore3(res, 0, out + 3 * i);
}
else
{
// Figure out how many elements are left over after
// BUFFER_SIZE % (3 * sizeof(type)).
// Assume power of two buffer size.
size_t parity = i & 1;
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
switch (parity)
{
case 0:
a.y = in1[3 * i + 1];
b.y = in2[3 * i + 1];
// fall through
case 1:
a.x = in1[3 * i];
b.x = in2[3 * i];
break;
}
RETTYPE res = )", invocation.c_str(), R"(;
switch (parity)
{
case 0:
out[3 * i + 1] = res.y;
// fall through
case 1:
out[3 * i] = res.x;
break;
}
}
}
)" };
// clang-format on
if (!is_vec3)
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
else
for (const auto &chunk : kernel_vec3) kernel << chunk;
return kernel.str();
}
std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
ParameterType retType1, ParameterType retType2,
ParameterType type1, ParameterType type2,
int vector_size_index)
{
// To keep the kernel code readable, use macros for types and undef values.
std::ostringstream kernel;
EmitDefineType(kernel, "RETTYPE1", retType1, vector_size_index);
EmitDefineType(kernel, "RETTYPE2", retType2, vector_size_index);
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
EmitDefineUndef(kernel, "UNDEF1", type1);
EmitDefineUndef(kernel, "UNDEF2", type2);
EmitDefineUndef(kernel, "UNDEFR2", retType2);
EmitEnableExtension(kernel, type1);
// clang-format off
const char *kernel_nonvec3[] = { R"(
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1* out1,
__global RETTYPE2* out2,
__global TYPE1* in1,
__global TYPE2* in2)
{
size_t i = get_global_id(0);
out1[i] = )", builtin, R"((in1[i], in2[i], out2 + i);
}
)" };
const char *kernel_vec3[] = { R"(
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1_SCALAR* out1,
__global RETTYPE2_SCALAR* out2,
__global TYPE1_SCALAR* in1,
__global TYPE2_SCALAR* in2)
{
size_t i = get_global_id(0);
if (i + 1 < get_global_size(0))
{
TYPE1 a = vload3(0, in1 + 3 * i);
TYPE2 b = vload3(0, in2 + 3 * i);
RETTYPE2 res2 = UNDEFR2;
RETTYPE1 res1 = )", builtin, R"((a, b, &res2);
vstore3(res1, 0, out1 + 3 * i);
vstore3(res2, 0, out2 + 3 * i);
}
else
{
// Figure out how many elements are left over after
// BUFFER_SIZE % (3 * sizeof(type)).
// Assume power of two buffer size.
size_t parity = i & 1;
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
switch (parity)
{
case 0:
a.y = in1[3 * i + 1];
b.y = in2[3 * i + 1];
// fall through
case 1:
a.x = in1[3 * i];
b.x = in2[3 * i];
break;
}
RETTYPE2 res2 = UNDEFR2;
RETTYPE1 res1 = )", builtin, R"((a, b, &res2);
switch (parity)
{
case 0:
out1[3 * i + 1] = res1.y;
out2[3 * i + 1] = res2.y;
// fall through
case 1:
out1[3 * i] = res1.x;
out2[3 * i] = res2.x;
break;
}
}
}
)" };
// clang-format on
if (sizeValues[vector_size_index] != 3)
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
else
for (const auto &chunk : kernel_vec3) kernel << chunk;
return kernel.str();
}
std::string GetTernaryKernel(const std::string &kernel_name,
const char *builtin, ParameterType retType,
ParameterType type1, ParameterType type2,
ParameterType type3, int vector_size_index)
{
// To keep the kernel code readable, use macros for types and undef values.
std::ostringstream kernel;
EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
EmitDefineType(kernel, "TYPE3", type3, vector_size_index);
EmitDefineUndef(kernel, "UNDEF1", type1);
EmitDefineUndef(kernel, "UNDEF2", type2);
EmitDefineUndef(kernel, "UNDEF3", type3);
EmitEnableExtension(kernel, type1);
// clang-format off
const char *kernel_nonvec3[] = { R"(
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
__global TYPE1* in1,
__global TYPE2* in2,
__global TYPE3* in3)
{
size_t i = get_global_id(0);
out[i] = )", builtin, R"((in1[i], in2[i], in3[i]);
}
)" };
const char *kernel_vec3[] = { R"(
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
__global TYPE1_SCALAR* in1,
__global TYPE2_SCALAR* in2,
__global TYPE3_SCALAR* in3)
{
size_t i = get_global_id(0);
if (i + 1 < get_global_size(0))
{
TYPE1 a = vload3(0, in1 + 3 * i);
TYPE2 b = vload3(0, in2 + 3 * i);
TYPE3 c = vload3(0, in3 + 3 * i);
RETTYPE res = )", builtin, R"((a, b, c);
vstore3(res, 0, out + 3 * i);
}
else
{
// Figure out how many elements are left over after
// BUFFER_SIZE % (3 * sizeof(type)).
// Assume power of two buffer size.
size_t parity = i & 1;
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
TYPE3 c = (TYPE3)(UNDEF3, UNDEF3, UNDEF3);
switch (parity)
{
case 0:
a.y = in1[3 * i + 1];
b.y = in2[3 * i + 1];
c.y = in3[3 * i + 1];
// fall through
case 1:
a.x = in1[3 * i];
b.x = in2[3 * i];
c.x = in3[3 * i];
break;
}
RETTYPE res = )", builtin, R"((a, b, c);
switch (parity)
{
case 0:
out[3 * i + 1] = res.y;
// fall through
case 1:
out[3 * i] = res.x;
break;
}
}
}
)" };
// clang-format on
if (sizeValues[vector_size_index] != 3)
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
else
for (const auto &chunk : kernel_vec3) kernel << chunk;
return kernel.str();
}