mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
fixes #2387 Corrects the "correctly rounded" behavior for the math bruteforce tests. Specifically: * Only applies the `-cl-fp32-correctly-rounded-divide-sqrt` build option for the `divide_cr` and `sqrt_cr` tests. The other tests do not receive this build option. This means that there is a difference in the behavior of the `divide` and `divide_cr` tests and the `sqrt` and `sqrt_cr` tests, and the "correctly rounded" build option is not applied to the fp16 or fp64 tests. * Removes the build option to toggle testing the correctly rounded divide and square root tests since it no longer needed. Instead, the test names can be used to choose whether to test the correctly rounded functions or the non-correctly rounded functions. Additionally: * Relaxes the fp16 sqrt accuracy requirements to 1 ULP. This is needed to pass this test on some of our devices. This part is still under discussion, so I will keep this PR as a draft until it is settled.
611 lines
19 KiB
C++
611 lines
19 KiB
C++
//
|
|
// Copyright (c) 2022-2024 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
#include "common.h"
|
|
|
|
#include "utility.h" // for sizeNames and sizeValues.
|
|
|
|
#include <sstream>
|
|
#include <string>
|
|
|
|
namespace {
|
|
|
|
const char *GetTypeName(ParameterType type)
|
|
{
|
|
switch (type)
|
|
{
|
|
case ParameterType::Half: return "half";
|
|
case ParameterType::Float: return "float";
|
|
case ParameterType::Double: return "double";
|
|
case ParameterType::Short: return "short";
|
|
case ParameterType::UShort: return "ushort";
|
|
case ParameterType::Int: return "int";
|
|
case ParameterType::UInt: return "uint";
|
|
case ParameterType::Long: return "long";
|
|
case ParameterType::ULong: return "ulong";
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
const char *GetUndefValue(ParameterType type)
|
|
{
|
|
switch (type)
|
|
{
|
|
case ParameterType::Half:
|
|
case ParameterType::Float:
|
|
case ParameterType::Double: return "NAN";
|
|
|
|
case ParameterType::Short:
|
|
case ParameterType::UShort: return "0x5678";
|
|
|
|
case ParameterType::Int:
|
|
case ParameterType::UInt: return "0x12345678";
|
|
|
|
case ParameterType::Long:
|
|
case ParameterType::ULong: return "0x0ddf00dbadc0ffee";
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
void EmitDefineType(std::ostringstream &kernel, const char *name,
|
|
ParameterType type, int vector_size_index)
|
|
{
|
|
kernel << "#define " << name << " " << GetTypeName(type)
|
|
<< sizeNames[vector_size_index] << '\n';
|
|
kernel << "#define " << name << "_SCALAR " << GetTypeName(type) << '\n';
|
|
}
|
|
|
|
void EmitDefineUndef(std::ostringstream &kernel, const char *name,
|
|
ParameterType type)
|
|
{
|
|
kernel << "#define " << name << " " << GetUndefValue(type) << '\n';
|
|
}
|
|
|
|
void EmitEnableExtension(std::ostringstream &kernel,
|
|
const std::initializer_list<ParameterType> &types)
|
|
{
|
|
bool needsFp64 = false;
|
|
bool needsFp16 = false;
|
|
|
|
for (const auto &type : types)
|
|
{
|
|
switch (type)
|
|
{
|
|
case ParameterType::Double: needsFp64 = true; break;
|
|
case ParameterType::Half: needsFp16 = true; break;
|
|
case ParameterType::Float:
|
|
case ParameterType::Short:
|
|
case ParameterType::UShort:
|
|
case ParameterType::Int:
|
|
case ParameterType::UInt:
|
|
case ParameterType::Long:
|
|
case ParameterType::ULong:
|
|
// No extension required.
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (needsFp64) kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
|
if (needsFp16) kernel << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
|
|
}
|
|
|
|
std::string GetBuildOptions(const BuildKernelInfo &info)
|
|
{
|
|
std::ostringstream options;
|
|
|
|
if (gForceFTZ)
|
|
{
|
|
options << " -cl-denorms-are-zero";
|
|
}
|
|
|
|
if (info.relaxedMode)
|
|
{
|
|
options << " -cl-fast-relaxed-math";
|
|
}
|
|
|
|
if (info.correctlyRounded)
|
|
{
|
|
options << " -cl-fp32-correctly-rounded-divide-sqrt";
|
|
}
|
|
|
|
return options.str();
|
|
}
|
|
|
|
} // anonymous namespace
|
|
|
|
std::string GetKernelName(int vector_size_index)
|
|
{
|
|
return std::string("math_kernel") + sizeNames[vector_size_index];
|
|
}
|
|
|
|
std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
|
|
ParameterType retType, ParameterType type1,
|
|
int vector_size_index)
|
|
{
|
|
// To keep the kernel code readable, use macros for types and undef values.
|
|
std::ostringstream kernel;
|
|
EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
|
|
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
|
|
EmitDefineUndef(kernel, "UNDEF1", type1);
|
|
EmitEnableExtension(kernel, { retType, type1 });
|
|
|
|
// clang-format off
|
|
const char *kernel_nonvec3[] = { R"(
|
|
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
|
|
__global TYPE1* in1)
|
|
{
|
|
size_t i = get_global_id(0);
|
|
out[i] = )", builtin, R"((in1[i]);
|
|
}
|
|
)" };
|
|
|
|
const char *kernel_vec3[] = { R"(
|
|
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
|
|
__global TYPE1_SCALAR* in1)
|
|
{
|
|
size_t i = get_global_id(0);
|
|
|
|
if (i + 1 < get_global_size(0))
|
|
{
|
|
TYPE1 a = vload3(0, in1 + 3 * i);
|
|
RETTYPE res = )", builtin, R"((a);
|
|
vstore3(res, 0, out + 3 * i);
|
|
}
|
|
else
|
|
{
|
|
// Figure out how many elements are left over after
|
|
// BUFFER_SIZE % (3 * sizeof(type)).
|
|
// Assume power of two buffer size.
|
|
size_t parity = i & 1;
|
|
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
|
|
switch (parity)
|
|
{
|
|
case 0:
|
|
a.y = in1[3 * i + 1];
|
|
// fall through
|
|
case 1:
|
|
a.x = in1[3 * i];
|
|
break;
|
|
}
|
|
|
|
RETTYPE res = )", builtin, R"((a);
|
|
|
|
switch (parity)
|
|
{
|
|
case 0:
|
|
out[3 * i + 1] = res.y;
|
|
// fall through
|
|
case 1:
|
|
out[3 * i] = res.x;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
)" };
|
|
// clang-format on
|
|
|
|
if (sizeValues[vector_size_index] != 3)
|
|
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
|
|
else
|
|
for (const auto &chunk : kernel_vec3) kernel << chunk;
|
|
|
|
return kernel.str();
|
|
}
|
|
|
|
std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
|
|
ParameterType retType1, ParameterType retType2,
|
|
ParameterType type1, int vector_size_index)
|
|
{
|
|
// To keep the kernel code readable, use macros for types and undef values.
|
|
std::ostringstream kernel;
|
|
EmitDefineType(kernel, "RETTYPE1", retType1, vector_size_index);
|
|
EmitDefineType(kernel, "RETTYPE2", retType2, vector_size_index);
|
|
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
|
|
EmitDefineUndef(kernel, "UNDEF1", type1);
|
|
EmitDefineUndef(kernel, "UNDEFR2", retType2);
|
|
EmitEnableExtension(kernel, { retType1, retType2, type1 });
|
|
|
|
// clang-format off
|
|
const char *kernel_nonvec3[] = { R"(
|
|
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1* out1,
|
|
__global RETTYPE2* out2,
|
|
__global TYPE1* in1)
|
|
{
|
|
size_t i = get_global_id(0);
|
|
out1[i] = )", builtin, R"((in1[i], out2 + i);
|
|
}
|
|
)" };
|
|
|
|
const char *kernel_vec3[] = { R"(
|
|
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1_SCALAR* out1,
|
|
__global RETTYPE2_SCALAR* out2,
|
|
__global TYPE1_SCALAR* in1)
|
|
{
|
|
size_t i = get_global_id(0);
|
|
|
|
if (i + 1 < get_global_size(0))
|
|
{
|
|
TYPE1 a = vload3(0, in1 + 3 * i);
|
|
RETTYPE2 res2 = UNDEFR2;
|
|
RETTYPE1 res1 = )", builtin, R"((a, &res2);
|
|
vstore3(res1, 0, out1 + 3 * i);
|
|
vstore3(res2, 0, out2 + 3 * i);
|
|
}
|
|
else
|
|
{
|
|
// Figure out how many elements are left over after
|
|
// BUFFER_SIZE % (3 * sizeof(type)).
|
|
// Assume power of two buffer size.
|
|
size_t parity = i & 1;
|
|
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
|
|
switch (parity)
|
|
{
|
|
case 0:
|
|
a.y = in1[3 * i + 1];
|
|
// fall through
|
|
case 1:
|
|
a.x = in1[3 * i];
|
|
break;
|
|
}
|
|
|
|
RETTYPE2 res2 = UNDEFR2;
|
|
RETTYPE1 res1 = )", builtin, R"((a, &res2);
|
|
|
|
switch (parity)
|
|
{
|
|
case 0:
|
|
out1[3 * i + 1] = res1.y;
|
|
out2[3 * i + 1] = res2.y;
|
|
// fall through
|
|
case 1:
|
|
out1[3 * i] = res1.x;
|
|
out2[3 * i] = res2.x;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
)" };
|
|
// clang-format on
|
|
|
|
if (sizeValues[vector_size_index] != 3)
|
|
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
|
|
else
|
|
for (const auto &chunk : kernel_vec3) kernel << chunk;
|
|
|
|
return kernel.str();
|
|
}
|
|
|
|
std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
|
|
ParameterType retType, ParameterType type1,
|
|
ParameterType type2, int vector_size_index)
|
|
{
|
|
// To keep the kernel code readable, use macros for types and undef values.
|
|
std::ostringstream kernel;
|
|
EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
|
|
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
|
|
EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
|
|
EmitDefineUndef(kernel, "UNDEF1", type1);
|
|
EmitDefineUndef(kernel, "UNDEF2", type2);
|
|
EmitEnableExtension(kernel, { retType, type1, type2 });
|
|
|
|
const bool is_vec3 = sizeValues[vector_size_index] == 3;
|
|
|
|
std::string invocation;
|
|
if (strlen(builtin) == 1)
|
|
{
|
|
// Assume a single-character builtin is an operator (e.g., +, *, ...).
|
|
invocation = is_vec3 ? "a" : "in1[i] ";
|
|
invocation += builtin;
|
|
invocation += is_vec3 ? "b" : " in2[i]";
|
|
}
|
|
else
|
|
{
|
|
// Otherwise call the builtin as a function with two arguments.
|
|
invocation = builtin;
|
|
invocation += is_vec3 ? "(a, b)" : "(in1[i], in2[i])";
|
|
}
|
|
|
|
// clang-format off
|
|
const char *kernel_nonvec3[] = { R"(
|
|
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
|
|
__global TYPE1* in1,
|
|
__global TYPE2* in2)
|
|
{
|
|
size_t i = get_global_id(0);
|
|
out[i] = )", invocation.c_str(), R"(;
|
|
}
|
|
)" };
|
|
|
|
const char *kernel_vec3[] = { R"(
|
|
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
|
|
__global TYPE1_SCALAR* in1,
|
|
__global TYPE2_SCALAR* in2)
|
|
{
|
|
size_t i = get_global_id(0);
|
|
|
|
if (i + 1 < get_global_size(0))
|
|
{
|
|
TYPE1 a = vload3(0, in1 + 3 * i);
|
|
TYPE2 b = vload3(0, in2 + 3 * i);
|
|
RETTYPE res = )", invocation.c_str(), R"(;
|
|
vstore3(res, 0, out + 3 * i);
|
|
}
|
|
else
|
|
{
|
|
// Figure out how many elements are left over after
|
|
// BUFFER_SIZE % (3 * sizeof(type)).
|
|
// Assume power of two buffer size.
|
|
size_t parity = i & 1;
|
|
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
|
|
TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
|
|
switch (parity)
|
|
{
|
|
case 0:
|
|
a.y = in1[3 * i + 1];
|
|
b.y = in2[3 * i + 1];
|
|
// fall through
|
|
case 1:
|
|
a.x = in1[3 * i];
|
|
b.x = in2[3 * i];
|
|
break;
|
|
}
|
|
|
|
RETTYPE res = )", invocation.c_str(), R"(;
|
|
|
|
switch (parity)
|
|
{
|
|
case 0:
|
|
out[3 * i + 1] = res.y;
|
|
// fall through
|
|
case 1:
|
|
out[3 * i] = res.x;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
)" };
|
|
// clang-format on
|
|
|
|
if (!is_vec3)
|
|
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
|
|
else
|
|
for (const auto &chunk : kernel_vec3) kernel << chunk;
|
|
|
|
return kernel.str();
|
|
}
|
|
|
|
std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
|
|
ParameterType retType1, ParameterType retType2,
|
|
ParameterType type1, ParameterType type2,
|
|
int vector_size_index)
|
|
{
|
|
// To keep the kernel code readable, use macros for types and undef values.
|
|
std::ostringstream kernel;
|
|
EmitDefineType(kernel, "RETTYPE1", retType1, vector_size_index);
|
|
EmitDefineType(kernel, "RETTYPE2", retType2, vector_size_index);
|
|
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
|
|
EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
|
|
EmitDefineUndef(kernel, "UNDEF1", type1);
|
|
EmitDefineUndef(kernel, "UNDEF2", type2);
|
|
EmitDefineUndef(kernel, "UNDEFR2", retType2);
|
|
EmitEnableExtension(kernel, { retType1, retType2, type1, type2 });
|
|
|
|
// clang-format off
|
|
const char *kernel_nonvec3[] = { R"(
|
|
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1* out1,
|
|
__global RETTYPE2* out2,
|
|
__global TYPE1* in1,
|
|
__global TYPE2* in2)
|
|
{
|
|
size_t i = get_global_id(0);
|
|
out1[i] = )", builtin, R"((in1[i], in2[i], out2 + i);
|
|
}
|
|
)" };
|
|
|
|
const char *kernel_vec3[] = { R"(
|
|
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1_SCALAR* out1,
|
|
__global RETTYPE2_SCALAR* out2,
|
|
__global TYPE1_SCALAR* in1,
|
|
__global TYPE2_SCALAR* in2)
|
|
{
|
|
size_t i = get_global_id(0);
|
|
|
|
if (i + 1 < get_global_size(0))
|
|
{
|
|
TYPE1 a = vload3(0, in1 + 3 * i);
|
|
TYPE2 b = vload3(0, in2 + 3 * i);
|
|
RETTYPE2 res2 = UNDEFR2;
|
|
RETTYPE1 res1 = )", builtin, R"((a, b, &res2);
|
|
vstore3(res1, 0, out1 + 3 * i);
|
|
vstore3(res2, 0, out2 + 3 * i);
|
|
}
|
|
else
|
|
{
|
|
// Figure out how many elements are left over after
|
|
// BUFFER_SIZE % (3 * sizeof(type)).
|
|
// Assume power of two buffer size.
|
|
size_t parity = i & 1;
|
|
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
|
|
TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
|
|
switch (parity)
|
|
{
|
|
case 0:
|
|
a.y = in1[3 * i + 1];
|
|
b.y = in2[3 * i + 1];
|
|
// fall through
|
|
case 1:
|
|
a.x = in1[3 * i];
|
|
b.x = in2[3 * i];
|
|
break;
|
|
}
|
|
|
|
RETTYPE2 res2 = UNDEFR2;
|
|
RETTYPE1 res1 = )", builtin, R"((a, b, &res2);
|
|
|
|
switch (parity)
|
|
{
|
|
case 0:
|
|
out1[3 * i + 1] = res1.y;
|
|
out2[3 * i + 1] = res2.y;
|
|
// fall through
|
|
case 1:
|
|
out1[3 * i] = res1.x;
|
|
out2[3 * i] = res2.x;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
)" };
|
|
// clang-format on
|
|
|
|
if (sizeValues[vector_size_index] != 3)
|
|
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
|
|
else
|
|
for (const auto &chunk : kernel_vec3) kernel << chunk;
|
|
|
|
return kernel.str();
|
|
}
|
|
|
|
std::string GetTernaryKernel(const std::string &kernel_name,
|
|
const char *builtin, ParameterType retType,
|
|
ParameterType type1, ParameterType type2,
|
|
ParameterType type3, int vector_size_index)
|
|
{
|
|
// To keep the kernel code readable, use macros for types and undef values.
|
|
std::ostringstream kernel;
|
|
EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
|
|
EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
|
|
EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
|
|
EmitDefineType(kernel, "TYPE3", type3, vector_size_index);
|
|
EmitDefineUndef(kernel, "UNDEF1", type1);
|
|
EmitDefineUndef(kernel, "UNDEF2", type2);
|
|
EmitDefineUndef(kernel, "UNDEF3", type3);
|
|
EmitEnableExtension(kernel, { retType, type1, type2, type3 });
|
|
|
|
// clang-format off
|
|
const char *kernel_nonvec3[] = { R"(
|
|
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
|
|
__global TYPE1* in1,
|
|
__global TYPE2* in2,
|
|
__global TYPE3* in3)
|
|
{
|
|
size_t i = get_global_id(0);
|
|
out[i] = )", builtin, R"((in1[i], in2[i], in3[i]);
|
|
}
|
|
)" };
|
|
|
|
const char *kernel_vec3[] = { R"(
|
|
__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
|
|
__global TYPE1_SCALAR* in1,
|
|
__global TYPE2_SCALAR* in2,
|
|
__global TYPE3_SCALAR* in3)
|
|
{
|
|
size_t i = get_global_id(0);
|
|
|
|
if (i + 1 < get_global_size(0))
|
|
{
|
|
TYPE1 a = vload3(0, in1 + 3 * i);
|
|
TYPE2 b = vload3(0, in2 + 3 * i);
|
|
TYPE3 c = vload3(0, in3 + 3 * i);
|
|
RETTYPE res = )", builtin, R"((a, b, c);
|
|
vstore3(res, 0, out + 3 * i);
|
|
}
|
|
else
|
|
{
|
|
// Figure out how many elements are left over after
|
|
// BUFFER_SIZE % (3 * sizeof(type)).
|
|
// Assume power of two buffer size.
|
|
size_t parity = i & 1;
|
|
TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
|
|
TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
|
|
TYPE3 c = (TYPE3)(UNDEF3, UNDEF3, UNDEF3);
|
|
switch (parity)
|
|
{
|
|
case 0:
|
|
a.y = in1[3 * i + 1];
|
|
b.y = in2[3 * i + 1];
|
|
c.y = in3[3 * i + 1];
|
|
// fall through
|
|
case 1:
|
|
a.x = in1[3 * i];
|
|
b.x = in2[3 * i];
|
|
c.x = in3[3 * i];
|
|
break;
|
|
}
|
|
|
|
RETTYPE res = )", builtin, R"((a, b, c);
|
|
|
|
switch (parity)
|
|
{
|
|
case 0:
|
|
out[3 * i + 1] = res.y;
|
|
// fall through
|
|
case 1:
|
|
out[3 * i] = res.x;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
)" };
|
|
// clang-format on
|
|
|
|
if (sizeValues[vector_size_index] != 3)
|
|
for (const auto &chunk : kernel_nonvec3) kernel << chunk;
|
|
else
|
|
for (const auto &chunk : kernel_vec3) kernel << chunk;
|
|
|
|
return kernel.str();
|
|
}
|
|
|
|
cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
|
|
SourceGenerator generator)
|
|
{
|
|
// Generate the kernel code.
|
|
cl_uint vector_size_index = gMinVectorSizeIndex + job_id;
|
|
auto kernel_name = GetKernelName(vector_size_index);
|
|
auto source = generator(kernel_name, info.nameInCode, vector_size_index);
|
|
std::array<const char *, 1> sources{ source.c_str() };
|
|
|
|
// Create the program.
|
|
clProgramWrapper &program = info.programs[vector_size_index];
|
|
auto options = GetBuildOptions(info);
|
|
int error =
|
|
create_single_kernel_helper(gContext, &program, nullptr, sources.size(),
|
|
sources.data(), nullptr, options.c_str());
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
|
|
return error;
|
|
}
|
|
|
|
// Create a kernel for each thread. cl_kernels aren't thread safe, so make
|
|
// one for every thread
|
|
auto &kernels = info.kernels[vector_size_index];
|
|
assert(kernels.empty() && "Dirty BuildKernelInfo");
|
|
kernels.resize(info.threadCount);
|
|
for (auto &kernel : kernels)
|
|
{
|
|
kernel = clCreateKernel(program, kernel_name.c_str(), &error);
|
|
if (!kernel || error != CL_SUCCESS)
|
|
{
|
|
vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
|
|
return error;
|
|
}
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|