mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Factor out GetTernaryKernel (#1511)
Use a common function to create the kernel source code for testing 3-argument math builtins. This reduces code duplication. 1-argument and 2-argument math kernel construction will be factored out in future work. Change the kernels to use preprocessor defines for argument types and undef values, to make the CTS code easier to read. Co-authored-by: Marco Antognini <marco.antognini@arm.com> Signed-off-by: Marco Antognini <marco.antognini@arm.com> Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com> Signed-off-by: Marco Antognini <marco.antognini@arm.com> Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com> Co-authored-by: Marco Antognini <marco.antognini@arm.com>
This commit is contained in:
committed by
GitHub
parent
07b055cd68
commit
d9a938b698
@@ -9,6 +9,7 @@ set(${MODULE_NAME}_SOURCES
|
||||
binary_operator_float.cpp
|
||||
binary_two_results_i_double.cpp
|
||||
binary_two_results_i_float.cpp
|
||||
common.cpp
|
||||
common.h
|
||||
function_list.cpp
|
||||
function_list.h
|
||||
|
||||
170
test_conformance/math_brute_force/common.cpp
Normal file
170
test_conformance/math_brute_force/common.cpp
Normal file
@@ -0,0 +1,170 @@
|
||||
//
|
||||
// Copyright (c) 2022 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#include "utility.h" // for sizeNames and sizeValues.
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace {
|
||||
|
||||
const char *GetTypeName(ParameterType type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case ParameterType::Float: return "float";
|
||||
case ParameterType::Double: return "double";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const char *GetUndefValue(ParameterType type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case ParameterType::Float:
|
||||
case ParameterType::Double: return "NAN";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void EmitDefineType(std::ostringstream &kernel, const char *name,
|
||||
ParameterType type, int vector_size_index)
|
||||
{
|
||||
kernel << "#define " << name << " " << GetTypeName(type)
|
||||
<< sizeNames[vector_size_index] << '\n';
|
||||
kernel << "#define " << name << "_SCALAR " << GetTypeName(type) << '\n';
|
||||
}
|
||||
|
||||
void EmitDefineUndef(std::ostringstream &kernel, const char *name,
|
||||
ParameterType type)
|
||||
{
|
||||
kernel << "#define " << name << " " << GetUndefValue(type) << '\n';
|
||||
}
|
||||
|
||||
void EmitEnableExtension(std::ostringstream &kernel, ParameterType type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case ParameterType::Double:
|
||||
kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
||||
break;
|
||||
|
||||
case ParameterType::Float:
|
||||
// No extension required.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
std::string GetKernelName(int vector_size_index)
|
||||
{
|
||||
return std::string("math_kernel") + sizeNames[vector_size_index];
|
||||
}
|
||||
|
||||
std::string GetTernaryKernel(const std::string &kernel_name,
|
||||
const char *builtin, ParameterType retType,
|
||||
ParameterType type1, ParameterType type2,
|
||||
ParameterType type3, int vector_size_index)
|
||||
{
|
||||
// To keep the kernel code readable, use macros for types and undef values.
|
||||
std::ostringstream kernel;
|
||||
EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
|
||||
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
|
||||
EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
|
||||
EmitDefineType(kernel, "TYPE3", type3, vector_size_index);
|
||||
EmitDefineUndef(kernel, "UNDEF1", type1);
|
||||
EmitDefineUndef(kernel, "UNDEF2", type2);
|
||||
EmitDefineUndef(kernel, "UNDEF3", type3);
|
||||
EmitEnableExtension(kernel, type1);
|
||||
|
||||
// clang-format off
|
||||
const char *kernel_nonvec3[] = { R"(
|
||||
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
|
||||
__global TYPE1* in1,
|
||||
__global TYPE2* in2,
|
||||
__global TYPE3* in3)
|
||||
{
|
||||
size_t i = get_global_id(0);
|
||||
out[i] = )", builtin, R"((in1[i], in2[i], in3[i]);
|
||||
}
|
||||
)" };
|
||||
|
||||
const char *kernel_vec3[] = { R"(
|
||||
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
|
||||
__global TYPE1_SCALAR* in1,
|
||||
__global TYPE2_SCALAR* in2,
|
||||
__global TYPE3_SCALAR* in3)
|
||||
{
|
||||
size_t i = get_global_id(0);
|
||||
|
||||
if (i + 1 < get_global_size(0))
|
||||
{
|
||||
TYPE1 a = vload3(0, in1 + 3 * i);
|
||||
TYPE2 b = vload3(0, in2 + 3 * i);
|
||||
TYPE3 c = vload3(0, in3 + 3 * i);
|
||||
RETTYPE res = )", builtin, R"((a, b, c);
|
||||
vstore3(res, 0, out + 3 * i);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Figure out how many elements are left over after
|
||||
// BUFFER_SIZE % (3 * sizeof(type)).
|
||||
// Assume power of two buffer size.
|
||||
size_t parity = i & 1;
|
||||
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
|
||||
TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
|
||||
TYPE3 c = (TYPE3)(UNDEF3, UNDEF3, UNDEF3);
|
||||
switch (parity)
|
||||
{
|
||||
case 0:
|
||||
a.y = in1[3 * i + 1];
|
||||
b.y = in2[3 * i + 1];
|
||||
c.y = in3[3 * i + 1];
|
||||
// fall through
|
||||
case 1:
|
||||
a.x = in1[3 * i];
|
||||
b.x = in2[3 * i];
|
||||
c.x = in3[3 * i];
|
||||
break;
|
||||
}
|
||||
|
||||
RETTYPE res = )", builtin, R"((a, b, c);
|
||||
|
||||
switch (parity)
|
||||
{
|
||||
case 0:
|
||||
out[3 * i + 1] = res.y;
|
||||
// fall through
|
||||
case 1:
|
||||
out[3 * i] = res.x;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
)" };
|
||||
// clang-format on
|
||||
|
||||
if (sizeValues[vector_size_index] != 3)
|
||||
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
|
||||
else
|
||||
for (const auto &chunk : kernel_vec3) kernel << chunk;
|
||||
|
||||
return kernel.str();
|
||||
}
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "utility.h"
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// Array of thread-specific kernels for each vector size.
|
||||
@@ -31,6 +32,22 @@ using Programs = std::array<clProgramWrapper, VECTOR_SIZE_COUNT>;
|
||||
// Array of buffers for each vector size.
|
||||
using Buffers = std::array<clMemWrapper, VECTOR_SIZE_COUNT>;
|
||||
|
||||
// Types supported for kernel code generation.
|
||||
enum class ParameterType
|
||||
{
|
||||
Float,
|
||||
Double,
|
||||
};
|
||||
|
||||
// Return kernel name suffixed with vector size.
|
||||
std::string GetKernelName(int vector_size_index);
|
||||
|
||||
// Generate kernel code for the given builtin function/operator.
|
||||
std::string GetTernaryKernel(const std::string &kernel_name,
|
||||
const char *builtin, ParameterType retType,
|
||||
ParameterType type1, ParameterType type2,
|
||||
ParameterType type3, int vector_size_index);
|
||||
|
||||
// Information to generate OpenCL kernels.
|
||||
struct BuildKernelInfo
|
||||
{
|
||||
|
||||
@@ -26,94 +26,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in2, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in3 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in1[i], in2[i], in3[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double* out, __global double* in, __global double* in2, "
|
||||
"__global double* in3)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" double3 d0 = vload3( 0, in + 3 * i );\n"
|
||||
" double3 d1 = vload3( 0, in2 + 3 * i );\n"
|
||||
" double3 d2 = vload3( 0, in3 + 3 * i );\n"
|
||||
" d0 = ",
|
||||
name,
|
||||
"( d0, d1, d2 );\n"
|
||||
" vstore3( d0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" double3 d0;\n"
|
||||
" double3 d1;\n"
|
||||
" double3 d2;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" d0 = (double3)( in[3*i], NAN, NAN ); \n"
|
||||
" d1 = (double3)( in2[3*i], NAN, NAN ); \n"
|
||||
" d2 = (double3)( in3[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
|
||||
" d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" d0 = ",
|
||||
name,
|
||||
"( d0, d1, d2 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = d0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = d0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
|
||||
ParameterType::Double, ParameterType::Double,
|
||||
ParameterType::Double, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
@@ -26,92 +26,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in2, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in3 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in1[i], in2[i], in3[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float* out, __global float* in, __global float* in2, "
|
||||
"__global float* in3)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" float3 f1 = vload3( 0, in2 + 3 * i );\n"
|
||||
" float3 f2 = vload3( 0, in3 + 3 * i );\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, f1, f2 );\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" float3 f0;\n"
|
||||
" float3 f1;\n"
|
||||
" float3 f2;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
||||
" f1 = (float3)( in2[3*i], NAN, NAN ); \n"
|
||||
" f2 = (float3)( in3[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
|
||||
" f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, f1, f2 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
|
||||
ParameterType::Float, ParameterType::Float,
|
||||
ParameterType::Float, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
@@ -30,94 +30,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in2, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in3 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in1[i], in2[i], in3[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double* out, __global double* in, __global double* in2, "
|
||||
"__global double* in3)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" double3 d0 = vload3( 0, in + 3 * i );\n"
|
||||
" double3 d1 = vload3( 0, in2 + 3 * i );\n"
|
||||
" double3 d2 = vload3( 0, in3 + 3 * i );\n"
|
||||
" d0 = ",
|
||||
name,
|
||||
"( d0, d1, d2 );\n"
|
||||
" vstore3( d0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" double3 d0;\n"
|
||||
" double3 d1;\n"
|
||||
" double3 d2;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" d0 = (double3)( in[3*i], NAN, NAN ); \n"
|
||||
" d1 = (double3)( in2[3*i], NAN, NAN ); \n"
|
||||
" d2 = (double3)( in3[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
|
||||
" d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" d0 = ",
|
||||
name,
|
||||
"( d0, d1, d2 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = d0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = d0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
|
||||
ParameterType::Double, ParameterType::Double,
|
||||
ParameterType::Double, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
@@ -30,92 +30,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in2, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in3 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in1[i], in2[i], in3[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float* out, __global float* in, __global float* in2, "
|
||||
"__global float* in3)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" float3 f1 = vload3( 0, in2 + 3 * i );\n"
|
||||
" float3 f2 = vload3( 0, in3 + 3 * i );\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, f1, f2 );\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" float3 f0;\n"
|
||||
" float3 f1;\n"
|
||||
" float3 f2;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
||||
" f1 = (float3)( in2[3*i], NAN, NAN ); \n"
|
||||
" f2 = (float3)( in3[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
|
||||
" f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, f1, f2 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
|
||||
ParameterType::Float, ParameterType::Float,
|
||||
ParameterType::Float, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
Reference in New Issue
Block a user