mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
math_brute_force: Factor out GetUnaryKernel and GetBinaryKernel (#1525)
Use common functions to create the kernel source code for testing 1-argument and 2-argument math builtins. This reduces code duplication. Use appropriate patterns to initialise variables to their full bit widths. For example, `0xdead` was previously used to initialise 32-bit integers, while now a larger number spanning all bytes is used. Co-authored-by: Marco Antognini <marco.antognini@arm.com> Signed-off-by: Marco Antognini <marco.antognini@arm.com> Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com> Signed-off-by: Marco Antognini <marco.antognini@arm.com> Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com> Co-authored-by: Marco Antognini <marco.antognini@arm.com>
This commit is contained in:
committed by
GitHub
parent
44fe72c2b9
commit
63274f97b7
@@ -28,88 +28,13 @@ const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in1[i], in2[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double* out, __global double* in, __global double* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" double3 d0 = vload3( 0, in + 3 * i );\n"
|
||||
" double3 d1 = vload3( 0, in2 + 3 * i );\n"
|
||||
" d0 = ",
|
||||
name,
|
||||
"( d0, d1 );\n"
|
||||
" vstore3( d0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" double3 d0;\n"
|
||||
" double3 d1;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" d0 = (double3)( in[3*i], NAN, NAN ); \n"
|
||||
" d1 = (double3)( in2[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" d0 = ",
|
||||
name,
|
||||
"( d0, d1 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = d0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = d0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
|
||||
relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetBinaryKernel(kernel_name, name, ParameterType::Double,
|
||||
ParameterType::Double, ParameterType::Double,
|
||||
vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
||||
kernel_count, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
@@ -28,86 +28,13 @@ const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in1[i], in2[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float* out, __global float* in, __global float* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" float3 f1 = vload3( 0, in2 + 3 * i );\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, f1 );\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" float3 f0;\n"
|
||||
" float3 f1;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
||||
" f1 = (float3)( in2[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, f1 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
|
||||
relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source =
|
||||
GetBinaryKernel(kernel_name, name, ParameterType::Float,
|
||||
ParameterType::Float, ParameterType::Float, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
||||
kernel_count, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
@@ -27,88 +27,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global int",
|
||||
sizeNames[vectorSize],
|
||||
"* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in1[i], in2[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double* out, __global double* in, __global int* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" double3 d0 = vload3( 0, in + 3 * i );\n"
|
||||
" int3 i0 = vload3( 0, in2 + 3 * i );\n"
|
||||
" d0 = ",
|
||||
name,
|
||||
"( d0, i0 );\n"
|
||||
" vstore3( d0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" double3 d0;\n"
|
||||
" int3 i0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" d0 = (double3)( in[3*i], NAN, NAN ); \n"
|
||||
" i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" d0 = ",
|
||||
name,
|
||||
"( d0, i0 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = d0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = d0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
|
||||
relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source =
|
||||
GetBinaryKernel(kernel_name, name, ParameterType::Double,
|
||||
ParameterType::Double, ParameterType::Int, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
||||
kernel_count, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
@@ -27,86 +27,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global int",
|
||||
sizeNames[vectorSize],
|
||||
"* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in1[i], in2[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float* out, __global float* in, __global int* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" int3 i0 = vload3( 0, in2 + 3 * i );\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, i0 );\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" float3 f0;\n"
|
||||
" int3 i0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
||||
" i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, i0 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
|
||||
relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source =
|
||||
GetBinaryKernel(kernel_name, name, ParameterType::Float,
|
||||
ParameterType::Float, ParameterType::Int, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
||||
kernel_count, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
@@ -23,92 +23,16 @@
|
||||
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *operator_symbol, int vectorSize,
|
||||
cl_uint kernel_count, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = in1[i] ",
|
||||
operator_symbol,
|
||||
" in2[i];\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double* out, __global double* in, __global double* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" double3 d0 = vload3( 0, in + 3 * i );\n"
|
||||
" double3 d1 = vload3( 0, in2 + 3 * i );\n"
|
||||
" d0 = d0 ",
|
||||
operator_symbol,
|
||||
" d1;\n"
|
||||
" vstore3( d0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" double3 d0;\n"
|
||||
" double3 d1;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" d0 = (double3)( in[3*i], NAN, NAN ); \n"
|
||||
" d1 = (double3)( in2[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" d0 = d0 ",
|
||||
operator_symbol,
|
||||
" d1;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = d0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = d0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
|
||||
relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetBinaryKernel(kernel_name, name, ParameterType::Double,
|
||||
ParameterType::Double, ParameterType::Double,
|
||||
vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
||||
kernel_count, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
@@ -23,90 +23,16 @@
|
||||
|
||||
namespace {
|
||||
|
||||
int BuildKernel(const char *operator_symbol, int vectorSize,
|
||||
cl_uint kernel_count, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = in1[i] ",
|
||||
operator_symbol,
|
||||
" in2[i];\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float* out, __global float* in, __global float* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" float3 f1 = vload3( 0, in2 + 3 * i );\n"
|
||||
" f0 = f0 ",
|
||||
operator_symbol,
|
||||
" f1;\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" float3 f0;\n"
|
||||
" float3 f1;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
||||
" f1 = (float3)( in2[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" f0 = f0 ",
|
||||
operator_symbol,
|
||||
" f1;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
|
||||
relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source =
|
||||
GetBinaryKernel(kernel_name, name, ParameterType::Float,
|
||||
ParameterType::Float, ParameterType::Float, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
||||
kernel_count, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
@@ -28,95 +28,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global int",
|
||||
sizeNames[vectorSize],
|
||||
"* out2, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in1[i], in2[i], out2 + i );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double* out, __global int* out2, __global double* in, "
|
||||
"__global double* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" double3 d0 = vload3( 0, in + 3 * i );\n"
|
||||
" double3 d1 = vload3( 0, in2 + 3 * i );\n"
|
||||
" int3 i0 = 0xdeaddead;\n"
|
||||
" d0 = ",
|
||||
name,
|
||||
"( d0, d1, &i0 );\n"
|
||||
" vstore3( d0, 0, out + 3*i );\n"
|
||||
" vstore3( i0, 0, out2 + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" double3 d0;\n"
|
||||
" double3 d1;\n"
|
||||
" int3 i0 = 0xdeaddead;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" d0 = (double3)( in[3*i], NAN, NAN ); \n"
|
||||
" d1 = (double3)( in2[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" d0 = ",
|
||||
name,
|
||||
"( d0, d1, &i0 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = d0.y; \n"
|
||||
" out2[3*i+1] = i0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = d0.x; \n"
|
||||
" out2[3*i] = i0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetBinaryKernel(kernel_name, name, ParameterType::Double,
|
||||
ParameterType::Int, ParameterType::Double,
|
||||
ParameterType::Double, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
@@ -28,93 +28,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global int",
|
||||
sizeNames[vectorSize],
|
||||
"* out2, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in1[i], in2[i], out2 + i );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float* out, __global int* out2, __global float* in, "
|
||||
"__global float* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" float3 f1 = vload3( 0, in2 + 3 * i );\n"
|
||||
" int3 i0 = 0xdeaddead;\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, f1, &i0 );\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" vstore3( i0, 0, out2 + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" float3 f0;\n"
|
||||
" float3 f1;\n"
|
||||
" int3 i0 = 0xdeaddead;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
||||
" f1 = (float3)( in2[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, f1, &i0 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" out2[3*i+1] = i0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" out2[3*i] = i0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetBinaryKernel(kernel_name, name, ParameterType::Float,
|
||||
ParameterType::Int, ParameterType::Float,
|
||||
ParameterType::Float, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
@@ -29,6 +29,10 @@ const char *GetTypeName(ParameterType type)
|
||||
{
|
||||
case ParameterType::Float: return "float";
|
||||
case ParameterType::Double: return "double";
|
||||
case ParameterType::Int: return "int";
|
||||
case ParameterType::UInt: return "uint";
|
||||
case ParameterType::Long: return "long";
|
||||
case ParameterType::ULong: return "ulong";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@@ -39,6 +43,12 @@ const char *GetUndefValue(ParameterType type)
|
||||
{
|
||||
case ParameterType::Float:
|
||||
case ParameterType::Double: return "NAN";
|
||||
|
||||
case ParameterType::Int:
|
||||
case ParameterType::UInt: return "0x12345678";
|
||||
|
||||
case ParameterType::Long:
|
||||
case ParameterType::ULong: return "0x0ddf00dbadc0ffee";
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@@ -66,6 +76,10 @@ void EmitEnableExtension(std::ostringstream &kernel, ParameterType type)
|
||||
break;
|
||||
|
||||
case ParameterType::Float:
|
||||
case ParameterType::Int:
|
||||
case ParameterType::UInt:
|
||||
case ParameterType::Long:
|
||||
case ParameterType::ULong:
|
||||
// No extension required.
|
||||
break;
|
||||
}
|
||||
@@ -78,6 +92,354 @@ std::string GetKernelName(int vector_size_index)
|
||||
return std::string("math_kernel") + sizeNames[vector_size_index];
|
||||
}
|
||||
|
||||
std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
|
||||
ParameterType retType, ParameterType type1,
|
||||
int vector_size_index)
|
||||
{
|
||||
// To keep the kernel code readable, use macros for types and undef values.
|
||||
std::ostringstream kernel;
|
||||
EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
|
||||
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
|
||||
EmitDefineUndef(kernel, "UNDEF1", type1);
|
||||
EmitEnableExtension(kernel, type1);
|
||||
|
||||
// clang-format off
|
||||
const char *kernel_nonvec3[] = { R"(
|
||||
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
|
||||
__global TYPE1* in1)
|
||||
{
|
||||
size_t i = get_global_id(0);
|
||||
out[i] = )", builtin, R"((in1[i]);
|
||||
}
|
||||
)" };
|
||||
|
||||
const char *kernel_vec3[] = { R"(
|
||||
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
|
||||
__global TYPE1_SCALAR* in1)
|
||||
{
|
||||
size_t i = get_global_id(0);
|
||||
|
||||
if (i + 1 < get_global_size(0))
|
||||
{
|
||||
TYPE1 a = vload3(0, in1 + 3 * i);
|
||||
RETTYPE res = )", builtin, R"((a);
|
||||
vstore3(res, 0, out + 3 * i);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Figure out how many elements are left over after
|
||||
// BUFFER_SIZE % (3 * sizeof(type)).
|
||||
// Assume power of two buffer size.
|
||||
size_t parity = i & 1;
|
||||
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
|
||||
switch (parity)
|
||||
{
|
||||
case 0:
|
||||
a.y = in1[3 * i + 1];
|
||||
// fall through
|
||||
case 1:
|
||||
a.x = in1[3 * i];
|
||||
break;
|
||||
}
|
||||
|
||||
RETTYPE res = )", builtin, R"((a);
|
||||
|
||||
switch (parity)
|
||||
{
|
||||
case 0:
|
||||
out[3 * i + 1] = res.y;
|
||||
// fall through
|
||||
case 1:
|
||||
out[3 * i] = res.x;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
)" };
|
||||
// clang-format on
|
||||
|
||||
if (sizeValues[vector_size_index] != 3)
|
||||
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
|
||||
else
|
||||
for (const auto &chunk : kernel_vec3) kernel << chunk;
|
||||
|
||||
return kernel.str();
|
||||
}
|
||||
|
||||
std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
|
||||
ParameterType retType1, ParameterType retType2,
|
||||
ParameterType type1, int vector_size_index)
|
||||
{
|
||||
// To keep the kernel code readable, use macros for types and undef values.
|
||||
std::ostringstream kernel;
|
||||
EmitDefineType(kernel, "RETTYPE1", retType1, vector_size_index);
|
||||
EmitDefineType(kernel, "RETTYPE2", retType2, vector_size_index);
|
||||
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
|
||||
EmitDefineUndef(kernel, "UNDEF1", type1);
|
||||
EmitDefineUndef(kernel, "UNDEFR2", retType2);
|
||||
EmitEnableExtension(kernel, type1);
|
||||
|
||||
// clang-format off
|
||||
const char *kernel_nonvec3[] = { R"(
|
||||
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1* out1,
|
||||
__global RETTYPE2* out2,
|
||||
__global TYPE1* in1)
|
||||
{
|
||||
size_t i = get_global_id(0);
|
||||
out1[i] = )", builtin, R"((in1[i], out2 + i);
|
||||
}
|
||||
)" };
|
||||
|
||||
const char *kernel_vec3[] = { R"(
|
||||
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1_SCALAR* out1,
|
||||
__global RETTYPE2_SCALAR* out2,
|
||||
__global TYPE1_SCALAR* in1)
|
||||
{
|
||||
size_t i = get_global_id(0);
|
||||
|
||||
if (i + 1 < get_global_size(0))
|
||||
{
|
||||
TYPE1 a = vload3(0, in1 + 3 * i);
|
||||
RETTYPE2 res2 = UNDEFR2;
|
||||
RETTYPE1 res1 = )", builtin, R"((a, &res2);
|
||||
vstore3(res1, 0, out1 + 3 * i);
|
||||
vstore3(res2, 0, out2 + 3 * i);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Figure out how many elements are left over after
|
||||
// BUFFER_SIZE % (3 * sizeof(type)).
|
||||
// Assume power of two buffer size.
|
||||
size_t parity = i & 1;
|
||||
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
|
||||
switch (parity)
|
||||
{
|
||||
case 0:
|
||||
a.y = in1[3 * i + 1];
|
||||
// fall through
|
||||
case 1:
|
||||
a.x = in1[3 * i];
|
||||
break;
|
||||
}
|
||||
|
||||
RETTYPE2 res2 = UNDEFR2;
|
||||
RETTYPE1 res1 = )", builtin, R"((a, &res2);
|
||||
|
||||
switch (parity)
|
||||
{
|
||||
case 0:
|
||||
out1[3 * i + 1] = res1.y;
|
||||
out2[3 * i + 1] = res2.y;
|
||||
// fall through
|
||||
case 1:
|
||||
out1[3 * i] = res1.x;
|
||||
out2[3 * i] = res2.x;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
)" };
|
||||
// clang-format on
|
||||
|
||||
if (sizeValues[vector_size_index] != 3)
|
||||
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
|
||||
else
|
||||
for (const auto &chunk : kernel_vec3) kernel << chunk;
|
||||
|
||||
return kernel.str();
|
||||
}
|
||||
|
||||
std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
|
||||
ParameterType retType, ParameterType type1,
|
||||
ParameterType type2, int vector_size_index)
|
||||
{
|
||||
// To keep the kernel code readable, use macros for types and undef values.
|
||||
std::ostringstream kernel;
|
||||
EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
|
||||
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
|
||||
EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
|
||||
EmitDefineUndef(kernel, "UNDEF1", type1);
|
||||
EmitDefineUndef(kernel, "UNDEF2", type2);
|
||||
EmitEnableExtension(kernel, type1);
|
||||
|
||||
const bool is_vec3 = sizeValues[vector_size_index] == 3;
|
||||
|
||||
std::string invocation;
|
||||
if (strlen(builtin) == 1)
|
||||
{
|
||||
// Assume a single-character builtin is an operator (e.g., +, *, ...).
|
||||
invocation = is_vec3 ? "a" : "in1[i] ";
|
||||
invocation += builtin;
|
||||
invocation += is_vec3 ? "b" : " in2[i]";
|
||||
}
|
||||
else
|
||||
{
|
||||
// Otherwise call the builtin as a function with two arguments.
|
||||
invocation = builtin;
|
||||
invocation += is_vec3 ? "(a, b)" : "(in1[i], in2[i])";
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
const char *kernel_nonvec3[] = { R"(
|
||||
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
|
||||
__global TYPE1* in1,
|
||||
__global TYPE2* in2)
|
||||
{
|
||||
size_t i = get_global_id(0);
|
||||
out[i] = )", invocation.c_str(), R"(;
|
||||
}
|
||||
)" };
|
||||
|
||||
const char *kernel_vec3[] = { R"(
|
||||
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
|
||||
__global TYPE1_SCALAR* in1,
|
||||
__global TYPE2_SCALAR* in2)
|
||||
{
|
||||
size_t i = get_global_id(0);
|
||||
|
||||
if (i + 1 < get_global_size(0))
|
||||
{
|
||||
TYPE1 a = vload3(0, in1 + 3 * i);
|
||||
TYPE2 b = vload3(0, in2 + 3 * i);
|
||||
RETTYPE res = )", invocation.c_str(), R"(;
|
||||
vstore3(res, 0, out + 3 * i);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Figure out how many elements are left over after
|
||||
// BUFFER_SIZE % (3 * sizeof(type)).
|
||||
// Assume power of two buffer size.
|
||||
size_t parity = i & 1;
|
||||
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
|
||||
TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
|
||||
switch (parity)
|
||||
{
|
||||
case 0:
|
||||
a.y = in1[3 * i + 1];
|
||||
b.y = in2[3 * i + 1];
|
||||
// fall through
|
||||
case 1:
|
||||
a.x = in1[3 * i];
|
||||
b.x = in2[3 * i];
|
||||
break;
|
||||
}
|
||||
|
||||
RETTYPE res = )", invocation.c_str(), R"(;
|
||||
|
||||
switch (parity)
|
||||
{
|
||||
case 0:
|
||||
out[3 * i + 1] = res.y;
|
||||
// fall through
|
||||
case 1:
|
||||
out[3 * i] = res.x;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
)" };
|
||||
// clang-format on
|
||||
|
||||
if (!is_vec3)
|
||||
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
|
||||
else
|
||||
for (const auto &chunk : kernel_vec3) kernel << chunk;
|
||||
|
||||
return kernel.str();
|
||||
}
|
||||
|
||||
std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
|
||||
ParameterType retType1, ParameterType retType2,
|
||||
ParameterType type1, ParameterType type2,
|
||||
int vector_size_index)
|
||||
{
|
||||
// To keep the kernel code readable, use macros for types and undef values.
|
||||
std::ostringstream kernel;
|
||||
EmitDefineType(kernel, "RETTYPE1", retType1, vector_size_index);
|
||||
EmitDefineType(kernel, "RETTYPE2", retType2, vector_size_index);
|
||||
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
|
||||
EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
|
||||
EmitDefineUndef(kernel, "UNDEF1", type1);
|
||||
EmitDefineUndef(kernel, "UNDEF2", type2);
|
||||
EmitDefineUndef(kernel, "UNDEFR2", retType2);
|
||||
EmitEnableExtension(kernel, type1);
|
||||
|
||||
// clang-format off
|
||||
const char *kernel_nonvec3[] = { R"(
|
||||
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1* out1,
|
||||
__global RETTYPE2* out2,
|
||||
__global TYPE1* in1,
|
||||
__global TYPE2* in2)
|
||||
{
|
||||
size_t i = get_global_id(0);
|
||||
out1[i] = )", builtin, R"((in1[i], in2[i], out2 + i);
|
||||
}
|
||||
)" };
|
||||
|
||||
const char *kernel_vec3[] = { R"(
|
||||
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1_SCALAR* out1,
|
||||
__global RETTYPE2_SCALAR* out2,
|
||||
__global TYPE1_SCALAR* in1,
|
||||
__global TYPE2_SCALAR* in2)
|
||||
{
|
||||
size_t i = get_global_id(0);
|
||||
|
||||
if (i + 1 < get_global_size(0))
|
||||
{
|
||||
TYPE1 a = vload3(0, in1 + 3 * i);
|
||||
TYPE2 b = vload3(0, in2 + 3 * i);
|
||||
RETTYPE2 res2 = UNDEFR2;
|
||||
RETTYPE1 res1 = )", builtin, R"((a, b, &res2);
|
||||
vstore3(res1, 0, out1 + 3 * i);
|
||||
vstore3(res2, 0, out2 + 3 * i);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Figure out how many elements are left over after
|
||||
// BUFFER_SIZE % (3 * sizeof(type)).
|
||||
// Assume power of two buffer size.
|
||||
size_t parity = i & 1;
|
||||
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
|
||||
TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
|
||||
switch (parity)
|
||||
{
|
||||
case 0:
|
||||
a.y = in1[3 * i + 1];
|
||||
b.y = in2[3 * i + 1];
|
||||
// fall through
|
||||
case 1:
|
||||
a.x = in1[3 * i];
|
||||
b.x = in2[3 * i];
|
||||
break;
|
||||
}
|
||||
|
||||
RETTYPE2 res2 = UNDEFR2;
|
||||
RETTYPE1 res1 = )", builtin, R"((a, b, &res2);
|
||||
|
||||
switch (parity)
|
||||
{
|
||||
case 0:
|
||||
out1[3 * i + 1] = res1.y;
|
||||
out2[3 * i + 1] = res2.y;
|
||||
// fall through
|
||||
case 1:
|
||||
out1[3 * i] = res1.x;
|
||||
out2[3 * i] = res2.x;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
)" };
|
||||
// clang-format on
|
||||
|
||||
if (sizeValues[vector_size_index] != 3)
|
||||
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
|
||||
else
|
||||
for (const auto &chunk : kernel_vec3) kernel << chunk;
|
||||
|
||||
return kernel.str();
|
||||
}
|
||||
|
||||
std::string GetTernaryKernel(const std::string &kernel_name,
|
||||
const char *builtin, ParameterType retType,
|
||||
ParameterType type1, ParameterType type2,
|
||||
|
||||
@@ -37,12 +37,29 @@ enum class ParameterType
|
||||
{
|
||||
Float,
|
||||
Double,
|
||||
Int,
|
||||
UInt,
|
||||
Long,
|
||||
ULong,
|
||||
};
|
||||
|
||||
// Return kernel name suffixed with vector size.
|
||||
std::string GetKernelName(int vector_size_index);
|
||||
|
||||
// Generate kernel code for the given builtin function/operator.
|
||||
std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
|
||||
ParameterType retType, ParameterType type1,
|
||||
int vector_size_index);
|
||||
std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
|
||||
ParameterType retType1, ParameterType retType2,
|
||||
ParameterType type1, int vector_size_index);
|
||||
std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
|
||||
ParameterType retType, ParameterType type1,
|
||||
ParameterType type2, int vector_size_index);
|
||||
std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
|
||||
ParameterType retType1, ParameterType retType2,
|
||||
ParameterType type1, ParameterType type2,
|
||||
int vector_size_index);
|
||||
std::string GetTernaryKernel(const std::string &kernel_name,
|
||||
const char *builtin, ParameterType retType,
|
||||
ParameterType type1, ParameterType type2,
|
||||
|
||||
@@ -27,81 +27,12 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global int",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global int* out, __global double* in)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" double3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" int3 i0 = ",
|
||||
name,
|
||||
"( f0 );\n"
|
||||
" vstore3( i0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" double3 f0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (double3)( in[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" int3 i0 = ",
|
||||
name,
|
||||
"( f0 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = i0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = i0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Int,
|
||||
ParameterType::Double, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
@@ -27,79 +27,12 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global int",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global int* out, __global float* in)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" int3 i0 = ",
|
||||
name,
|
||||
"( f0 );\n"
|
||||
" vstore3( i0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" float3 f0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" int3 i0 = ",
|
||||
name,
|
||||
"( f0 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = i0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = i0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Int,
|
||||
ParameterType::Float, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
@@ -27,88 +27,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global long",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in1[i], in2[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global long* out, __global double* in, __global double* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" double3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" double3 f1 = vload3( 0, in2 + 3 * i );\n"
|
||||
" long3 l0 = ",
|
||||
name,
|
||||
"( f0, f1 );\n"
|
||||
" vstore3( l0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" double3 f0;\n"
|
||||
" double3 f1;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (double3)( in[3*i], NAN, NAN ); \n"
|
||||
" f1 = (double3)( in2[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" f1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" long3 l0 = ",
|
||||
name,
|
||||
"( f0, f1 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = l0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = l0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
|
||||
relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetBinaryKernel(kernel_name, name, ParameterType::Long,
|
||||
ParameterType::Double, ParameterType::Double,
|
||||
vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
||||
kernel_count, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
@@ -26,86 +26,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global int",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in1, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in1[i], in2[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global int* out, __global float* in, __global float* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" float3 f1 = vload3( 0, in2 + 3 * i );\n"
|
||||
" int3 i0 = ",
|
||||
name,
|
||||
"( f0, f1 );\n"
|
||||
" vstore3( i0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" float3 f0;\n"
|
||||
" float3 f1;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
||||
" f1 = (float3)( in2[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" int3 i0 = ",
|
||||
name,
|
||||
"( f0, f1 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = i0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = i0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
|
||||
relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source =
|
||||
GetBinaryKernel(kernel_name, name, ParameterType::Int,
|
||||
ParameterType::Float, ParameterType::Float, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
||||
kernel_count, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
@@ -27,82 +27,12 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global long",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global long* out, __global double* in)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" double3 d0 = vload3( 0, in + 3 * i );\n"
|
||||
" long3 l0 = ",
|
||||
name,
|
||||
"( d0 );\n"
|
||||
" vstore3( l0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" double3 d0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" d0 = (double3)( in[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" long3 l0 = ",
|
||||
name,
|
||||
"( d0 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = l0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = l0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
|
||||
relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Long,
|
||||
ParameterType::Double, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
||||
kernel_count, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
@@ -26,81 +26,12 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global int",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global int* out, __global float* in)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" int3 i0 = ",
|
||||
name,
|
||||
"( f0 );\n"
|
||||
" vstore3( i0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" int3 i0;\n"
|
||||
" float3 f0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (float3)( in[3*i], 0xdead, 0xdead ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (float3)( in[3*i], in[3*i+1], 0xdead ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" i0 = ",
|
||||
name,
|
||||
"( f0 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = i0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = i0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
|
||||
relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Int,
|
||||
ParameterType::Float, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
||||
kernel_count, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
@@ -27,82 +27,12 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double* out, __global double* in)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" double3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0 );\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" double3 f0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (double3)( in[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
|
||||
relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Double,
|
||||
ParameterType::Double, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
||||
kernel_count, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
@@ -26,80 +26,12 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
|
||||
cl_kernel *k, cl_program *p, bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float* out, __global float* in)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0 );\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" float3 f0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
|
||||
relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Float,
|
||||
ParameterType::Float, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernels(sources.data(), sources.size(), kernel_name.c_str(),
|
||||
kernel_count, k, p, relaxedMode);
|
||||
}
|
||||
|
||||
cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
|
||||
|
||||
@@ -27,88 +27,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* out2, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in[i], out2 + i );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double* out, __global double* out2, __global double* in)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" double3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" double3 iout = NAN;\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, &iout );\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" vstore3( iout, 0, out2 + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" double3 iout = NAN;\n"
|
||||
" double3 f0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (double3)( in[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, &iout );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" out2[3*i+1] = iout.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" out2[3*i] = iout.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Double,
|
||||
ParameterType::Double, ParameterType::Double,
|
||||
vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
@@ -27,86 +27,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* out2, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in[i], out2 + i );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float* out, __global float* out2, __global float* in)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" float3 iout = NAN;\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, &iout );\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" vstore3( iout, 0, out2 + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" float3 iout = NAN;\n"
|
||||
" float3 f0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, &iout );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" out2[3*i+1] = iout.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" out2[3*i] = iout.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source =
|
||||
GetUnaryKernel(kernel_name, name, ParameterType::Float,
|
||||
ParameterType::Float, ParameterType::Float, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
@@ -28,88 +28,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global int",
|
||||
sizeNames[vectorSize],
|
||||
"* out2, __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* in )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in[i], out2 + i );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double* out, __global int* out2, __global double* in)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" double3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" int3 iout = INT_MIN;\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, &iout );\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" vstore3( iout, 0, out2 + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" int3 iout = INT_MIN;\n"
|
||||
" double3 f0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (double3)( in[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, &iout );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" out2[3*i+1] = iout.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" out2[3*i] = iout.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source =
|
||||
GetUnaryKernel(kernel_name, name, ParameterType::Double,
|
||||
ParameterType::Int, ParameterType::Double, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
@@ -28,86 +28,13 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global int",
|
||||
sizeNames[vectorSize],
|
||||
"* out2, __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* in )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in[i], out2 + i );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float* out, __global int* out2, __global float* in)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" float3 f0 = vload3( 0, in + 3 * i );\n"
|
||||
" int3 iout = INT_MIN;\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, &iout );\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" vstore3( iout, 0, out2 + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" int3 iout = INT_MIN;\n"
|
||||
" float3 f0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" f0 = (float3)( in[3*i], NAN, NAN ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( f0, &iout );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" out2[3*i+1] = iout.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" out2[3*i] = iout.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source =
|
||||
GetUnaryKernel(kernel_name, name, ParameterType::Float,
|
||||
ParameterType::Int, ParameterType::Float, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
@@ -27,83 +27,12 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global ulong",
|
||||
sizeNames[vectorSize],
|
||||
"* in )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global double* out, __global ulong* in )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" ulong3 u0 = vload3( 0, in + 3 * i );\n"
|
||||
" double3 f0 = ",
|
||||
name,
|
||||
"( u0 );\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" ulong3 u0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" u0 = (ulong3)( in[3*i], 0xdeaddeaddeaddeadUL, "
|
||||
"0xdeaddeaddeaddeadUL ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" u0 = (ulong3)( in[3*i], in[3*i+1], "
|
||||
"0xdeaddeaddeaddeadUL ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" double3 f0 = ",
|
||||
name,
|
||||
"( u0 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Double,
|
||||
ParameterType::ULong, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
@@ -27,80 +27,12 @@ namespace {
|
||||
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
|
||||
bool relaxedMode)
|
||||
{
|
||||
const char *c[] = { "__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float",
|
||||
sizeNames[vectorSize],
|
||||
"* out, __global uint",
|
||||
sizeNames[vectorSize],
|
||||
"* in )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" out[i] = ",
|
||||
name,
|
||||
"( in[i] );\n"
|
||||
"}\n" };
|
||||
|
||||
const char *c3[] = {
|
||||
"__kernel void math_kernel",
|
||||
sizeNames[vectorSize],
|
||||
"( __global float* out, __global uint* in)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
" {\n"
|
||||
" uint3 u0 = vload3( 0, in + 3 * i );\n"
|
||||
" float3 f0 = ",
|
||||
name,
|
||||
"( u0 );\n"
|
||||
" vstore3( f0, 0, out + 3*i );\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" size_t parity = i & 1; // Figure out how many elements are "
|
||||
"left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
|
||||
"buffer size \n"
|
||||
" uint3 u0;\n"
|
||||
" float3 f0;\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 1:\n"
|
||||
" u0 = (uint3)( in[3*i], 0xdead, 0xdead ); \n"
|
||||
" break;\n"
|
||||
" case 0:\n"
|
||||
" u0 = (uint3)( in[3*i], in[3*i+1], 0xdead ); \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" f0 = ",
|
||||
name,
|
||||
"( u0 );\n"
|
||||
" switch( parity )\n"
|
||||
" {\n"
|
||||
" case 0:\n"
|
||||
" out[3*i+1] = f0.y; \n"
|
||||
" // fall through\n"
|
||||
" case 1:\n"
|
||||
" out[3*i] = f0.x; \n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c) / sizeof(c[0]);
|
||||
|
||||
if (sizeValues[vectorSize] == 3)
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3) / sizeof(c3[0]);
|
||||
}
|
||||
|
||||
char testName[32];
|
||||
snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
|
||||
sizeNames[vectorSize]);
|
||||
|
||||
return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
|
||||
auto kernel_name = GetKernelName(vectorSize);
|
||||
auto source = GetUnaryKernel(kernel_name, name, ParameterType::Float,
|
||||
ParameterType::UInt, vectorSize);
|
||||
std::array<const char *, 1> sources{ source.c_str() };
|
||||
return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
|
||||
relaxedMode);
|
||||
}
|
||||
|
||||
struct BuildKernelInfo2
|
||||
|
||||
Reference in New Issue
Block a user