mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-25 08:19:02 +00:00
Format subgroups tests according to clang-format (#745)
* Format subgroups tests to clang-format * Format issue - fix do/while issue
This commit is contained in:
committed by
GitHub
parent
a6c3d921ae
commit
ec32bd9b5e
@@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -25,89 +25,212 @@
|
||||
|
||||
// Some template helpers
|
||||
template <typename Ty> struct TypeName;
|
||||
template <> struct TypeName<cl_half> { static const char * val() { return "half"; } };
|
||||
template <> struct TypeName<cl_uint> { static const char * val() { return "uint"; } };
|
||||
template <> struct TypeName<cl_int> { static const char * val() { return "int"; } };
|
||||
template <> struct TypeName<cl_ulong> { static const char * val() { return "ulong"; } };
|
||||
template <> struct TypeName<cl_long> { static const char * val() { return "long"; } };
|
||||
template <> struct TypeName<float> { static const char * val() { return "float"; } };
|
||||
template <> struct TypeName<double> { static const char * val() { return "double"; } };
|
||||
template <> struct TypeName<cl_half>
|
||||
{
|
||||
static const char *val() { return "half"; }
|
||||
};
|
||||
template <> struct TypeName<cl_uint>
|
||||
{
|
||||
static const char *val() { return "uint"; }
|
||||
};
|
||||
template <> struct TypeName<cl_int>
|
||||
{
|
||||
static const char *val() { return "int"; }
|
||||
};
|
||||
template <> struct TypeName<cl_ulong>
|
||||
{
|
||||
static const char *val() { return "ulong"; }
|
||||
};
|
||||
template <> struct TypeName<cl_long>
|
||||
{
|
||||
static const char *val() { return "long"; }
|
||||
};
|
||||
template <> struct TypeName<float>
|
||||
{
|
||||
static const char *val() { return "float"; }
|
||||
};
|
||||
template <> struct TypeName<double>
|
||||
{
|
||||
static const char *val() { return "double"; }
|
||||
};
|
||||
|
||||
template <typename Ty> struct TypeDef;
|
||||
template <> struct TypeDef<cl_half> { static const char * val() { return "typedef half Type;\n"; } };
|
||||
template <> struct TypeDef<cl_uint> { static const char * val() { return "typedef uint Type;\n"; } };
|
||||
template <> struct TypeDef<cl_int> { static const char * val() { return "typedef int Type;\n"; } };
|
||||
template <> struct TypeDef<cl_ulong> { static const char * val() { return "typedef ulong Type;\n"; } };
|
||||
template <> struct TypeDef<cl_long> { static const char * val() { return "typedef long Type;\n"; } };
|
||||
template <> struct TypeDef<float> { static const char * val() { return "typedef float Type;\n"; } };
|
||||
template <> struct TypeDef<double> { static const char * val() { return "typedef double Type;\n"; } };
|
||||
template <> struct TypeDef<cl_half>
|
||||
{
|
||||
static const char *val() { return "typedef half Type;\n"; }
|
||||
};
|
||||
template <> struct TypeDef<cl_uint>
|
||||
{
|
||||
static const char *val() { return "typedef uint Type;\n"; }
|
||||
};
|
||||
template <> struct TypeDef<cl_int>
|
||||
{
|
||||
static const char *val() { return "typedef int Type;\n"; }
|
||||
};
|
||||
template <> struct TypeDef<cl_ulong>
|
||||
{
|
||||
static const char *val() { return "typedef ulong Type;\n"; }
|
||||
};
|
||||
template <> struct TypeDef<cl_long>
|
||||
{
|
||||
static const char *val() { return "typedef long Type;\n"; }
|
||||
};
|
||||
template <> struct TypeDef<float>
|
||||
{
|
||||
static const char *val() { return "typedef float Type;\n"; }
|
||||
};
|
||||
template <> struct TypeDef<double>
|
||||
{
|
||||
static const char *val() { return "typedef double Type;\n"; }
|
||||
};
|
||||
|
||||
template <typename Ty, int Which> struct TypeIdentity;
|
||||
// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)0.0; } };
|
||||
// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return -(cl_half)65536.0; } };
|
||||
// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)65536.0; } };
|
||||
// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return
|
||||
// (cl_half)0.0; } }; template <> struct TypeIdentity<cl_half,0> { static
|
||||
// cl_half val() { return -(cl_half)65536.0; } }; template <> struct
|
||||
// TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)65536.0; }
|
||||
// };
|
||||
|
||||
template <> struct TypeIdentity<cl_uint,0> { static cl_uint val() { return (cl_uint)0; } };
|
||||
template <> struct TypeIdentity<cl_uint,1> { static cl_uint val() { return (cl_uint)0; } };
|
||||
template <> struct TypeIdentity<cl_uint,2> { static cl_uint val() { return (cl_uint)0xffffffff; } };
|
||||
template <> struct TypeIdentity<cl_uint, 0>
|
||||
{
|
||||
static cl_uint val() { return (cl_uint)0; }
|
||||
};
|
||||
template <> struct TypeIdentity<cl_uint, 1>
|
||||
{
|
||||
static cl_uint val() { return (cl_uint)0; }
|
||||
};
|
||||
template <> struct TypeIdentity<cl_uint, 2>
|
||||
{
|
||||
static cl_uint val() { return (cl_uint)0xffffffff; }
|
||||
};
|
||||
|
||||
template <> struct TypeIdentity<cl_int,0> { static cl_int val() { return (cl_int)0 ; } };
|
||||
template <> struct TypeIdentity<cl_int,1> { static cl_int val() { return (cl_int)0x80000000; } };
|
||||
template <> struct TypeIdentity<cl_int,2> { static cl_int val() { return (cl_int)0x7fffffff; } };
|
||||
template <> struct TypeIdentity<cl_int, 0>
|
||||
{
|
||||
static cl_int val() { return (cl_int)0; }
|
||||
};
|
||||
template <> struct TypeIdentity<cl_int, 1>
|
||||
{
|
||||
static cl_int val() { return (cl_int)0x80000000; }
|
||||
};
|
||||
template <> struct TypeIdentity<cl_int, 2>
|
||||
{
|
||||
static cl_int val() { return (cl_int)0x7fffffff; }
|
||||
};
|
||||
|
||||
template <> struct TypeIdentity<cl_ulong,0> { static cl_ulong val() { return (cl_ulong)0 ; } };
|
||||
template <> struct TypeIdentity<cl_ulong,1> { static cl_ulong val() { return (cl_ulong)0 ; } };
|
||||
template <> struct TypeIdentity<cl_ulong,2> { static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL ; } };
|
||||
template <> struct TypeIdentity<cl_ulong, 0>
|
||||
{
|
||||
static cl_ulong val() { return (cl_ulong)0; }
|
||||
};
|
||||
template <> struct TypeIdentity<cl_ulong, 1>
|
||||
{
|
||||
static cl_ulong val() { return (cl_ulong)0; }
|
||||
};
|
||||
template <> struct TypeIdentity<cl_ulong, 2>
|
||||
{
|
||||
static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL; }
|
||||
};
|
||||
|
||||
template <> struct TypeIdentity<cl_long,0> { static cl_long val() { return (cl_long)0; } };
|
||||
template <> struct TypeIdentity<cl_long,1> { static cl_long val() { return (cl_long)0x8000000000000000ULL; } };
|
||||
template <> struct TypeIdentity<cl_long,2> { static cl_long val() { return (cl_long)0x7fffffffffffffffULL; } };
|
||||
template <> struct TypeIdentity<cl_long, 0>
|
||||
{
|
||||
static cl_long val() { return (cl_long)0; }
|
||||
};
|
||||
template <> struct TypeIdentity<cl_long, 1>
|
||||
{
|
||||
static cl_long val() { return (cl_long)0x8000000000000000ULL; }
|
||||
};
|
||||
template <> struct TypeIdentity<cl_long, 2>
|
||||
{
|
||||
static cl_long val() { return (cl_long)0x7fffffffffffffffULL; }
|
||||
};
|
||||
|
||||
|
||||
template <> struct TypeIdentity<float,0> { static float val() { return 0.F; } };
|
||||
template <> struct TypeIdentity<float,1> { static float val() { return -std::numeric_limits<float>::infinity(); } };
|
||||
template <> struct TypeIdentity<float,2> { static float val() { return std::numeric_limits<float>::infinity(); } };
|
||||
template <> struct TypeIdentity<float, 0>
|
||||
{
|
||||
static float val() { return 0.F; }
|
||||
};
|
||||
template <> struct TypeIdentity<float, 1>
|
||||
{
|
||||
static float val() { return -std::numeric_limits<float>::infinity(); }
|
||||
};
|
||||
template <> struct TypeIdentity<float, 2>
|
||||
{
|
||||
static float val() { return std::numeric_limits<float>::infinity(); }
|
||||
};
|
||||
|
||||
template <> struct TypeIdentity<double,0> { static double val() { return 0.L; } };
|
||||
template <> struct TypeIdentity<double, 0>
|
||||
{
|
||||
static double val() { return 0.L; }
|
||||
};
|
||||
|
||||
template <> struct TypeIdentity<double,1> { static double val() { return -std::numeric_limits<double>::infinity(); } };
|
||||
template <> struct TypeIdentity<double,2> { static double val() { return std::numeric_limits<double>::infinity(); } };
|
||||
template <> struct TypeIdentity<double, 1>
|
||||
{
|
||||
static double val() { return -std::numeric_limits<double>::infinity(); }
|
||||
};
|
||||
template <> struct TypeIdentity<double, 2>
|
||||
{
|
||||
static double val() { return std::numeric_limits<double>::infinity(); }
|
||||
};
|
||||
|
||||
template <typename Ty> struct TypeCheck;
|
||||
template <> struct TypeCheck<cl_uint> { static bool val(cl_device_id) { return true; } };
|
||||
template <> struct TypeCheck<cl_int> { static bool val(cl_device_id) { return true; } };
|
||||
template <> struct TypeCheck<cl_uint>
|
||||
{
|
||||
static bool val(cl_device_id) { return true; }
|
||||
};
|
||||
template <> struct TypeCheck<cl_int>
|
||||
{
|
||||
static bool val(cl_device_id) { return true; }
|
||||
};
|
||||
|
||||
static bool
|
||||
int64_ok(cl_device_id device)
|
||||
static bool int64_ok(cl_device_id device)
|
||||
{
|
||||
char profile[128];
|
||||
int error;
|
||||
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
|
||||
if (error) {
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile),
|
||||
(void *)&profile, NULL);
|
||||
if (error)
|
||||
{
|
||||
log_info("clGetDeviceInfo failed with CL_DEVICE_PROFILE\n");
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (strcmp(profile, "EMBEDDED_PROFILE") == 0)
|
||||
return is_extension_available(device, "cles_khr_int64");
|
||||
return is_extension_available(device, "cles_khr_int64");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <> struct TypeCheck<cl_ulong> { static bool val(cl_device_id device) { return int64_ok(device); } };
|
||||
template <> struct TypeCheck<cl_long> { static bool val(cl_device_id device) { return int64_ok(device); } };
|
||||
template <> struct TypeCheck<cl_float> { static bool val(cl_device_id) { return true; } };
|
||||
template <> struct TypeCheck<cl_half> {
|
||||
static bool val(cl_device_id device) { return is_extension_available(device, "cl_khr_fp16"); }
|
||||
template <> struct TypeCheck<cl_ulong>
|
||||
{
|
||||
static bool val(cl_device_id device) { return int64_ok(device); }
|
||||
};
|
||||
template <> struct TypeCheck<double> {
|
||||
static bool val(cl_device_id device) {
|
||||
template <> struct TypeCheck<cl_long>
|
||||
{
|
||||
static bool val(cl_device_id device) { return int64_ok(device); }
|
||||
};
|
||||
template <> struct TypeCheck<cl_float>
|
||||
{
|
||||
static bool val(cl_device_id) { return true; }
|
||||
};
|
||||
template <> struct TypeCheck<cl_half>
|
||||
{
|
||||
static bool val(cl_device_id device)
|
||||
{
|
||||
return is_extension_available(device, "cl_khr_fp16");
|
||||
}
|
||||
};
|
||||
template <> struct TypeCheck<double>
|
||||
{
|
||||
static bool val(cl_device_id device)
|
||||
{
|
||||
int error;
|
||||
cl_device_fp_config c;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c), (void *)&c, NULL);
|
||||
if (error) {
|
||||
log_info("clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n");
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c),
|
||||
(void *)&c, NULL);
|
||||
if (error)
|
||||
{
|
||||
log_info(
|
||||
"clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n");
|
||||
return false;
|
||||
}
|
||||
return c != 0;
|
||||
@@ -116,10 +239,10 @@ template <> struct TypeCheck<double> {
|
||||
|
||||
|
||||
// Run a test kernel to compute the result of a built-in on an input
|
||||
static int
|
||||
run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t global, size_t local,
|
||||
void *idata, size_t isize, void *mdata, size_t msize,
|
||||
void *odata, size_t osize, size_t tsize=0)
|
||||
static int run_kernel(cl_context context, cl_command_queue queue,
|
||||
cl_kernel kernel, size_t global, size_t local,
|
||||
void *idata, size_t isize, void *mdata, size_t msize,
|
||||
void *odata, size_t osize, size_t tsize = 0)
|
||||
{
|
||||
clMemWrapper in;
|
||||
clMemWrapper xy;
|
||||
@@ -136,8 +259,10 @@ run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t
|
||||
out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
if (tsize) {
|
||||
tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, tsize, NULL, &error);
|
||||
if (tsize)
|
||||
{
|
||||
tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS,
|
||||
tsize, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
}
|
||||
|
||||
@@ -150,21 +275,26 @@ run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t
|
||||
error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
if (tsize) {
|
||||
if (tsize)
|
||||
{
|
||||
error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
}
|
||||
|
||||
error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL, NULL);
|
||||
error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL,
|
||||
NULL);
|
||||
test_error(error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
|
||||
NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL, NULL);
|
||||
error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL,
|
||||
NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL, NULL);
|
||||
error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL,
|
||||
NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
@@ -174,10 +304,13 @@ run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t
|
||||
}
|
||||
|
||||
// Driver for testing a single built in function
|
||||
template <typename Ty, typename Fns, size_t GSIZE, size_t LSIZE, size_t TSIZE=0>
|
||||
struct test {
|
||||
static int
|
||||
run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, const char *kname, const char *src, int dynscl=0)
|
||||
template <typename Ty, typename Fns, size_t GSIZE, size_t LSIZE,
|
||||
size_t TSIZE = 0>
|
||||
struct test
|
||||
{
|
||||
static int run(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements, const char *kname,
|
||||
const char *src, int dynscl = 0)
|
||||
{
|
||||
size_t tmp;
|
||||
int error;
|
||||
@@ -189,24 +322,25 @@ struct test {
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
cl_platform_id platform;
|
||||
cl_int sgmap[2*GSIZE];
|
||||
cl_int sgmap[2 * GSIZE];
|
||||
Ty mapin[LSIZE];
|
||||
Ty mapout[LSIZE];
|
||||
|
||||
// Make sure a test of type Ty is supported by the device
|
||||
if (!TypeCheck<Ty>::val(device))
|
||||
return 0;
|
||||
// Make sure a test of type Ty is supported by the device
|
||||
if (!TypeCheck<Ty>::val(device)) return 0;
|
||||
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL);
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
|
||||
(void *)&platform, NULL);
|
||||
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
|
||||
|
||||
kstrings[0] = "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
|
||||
"#define XY(M,I) M[I].x = get_sub_group_local_id(); M[I].y = get_sub_group_id();\n";
|
||||
"#define XY(M,I) M[I].x = get_sub_group_local_id(); "
|
||||
"M[I].y = get_sub_group_id();\n";
|
||||
kstrings[1] = TypeDef<Ty>::val();
|
||||
kstrings[2] = src;
|
||||
error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 3, kstrings, kname, "-cl-std=CL2.0");
|
||||
if (error != 0)
|
||||
return error;
|
||||
error = create_single_kernel_helper_with_build_options(
|
||||
context, &program, &kernel, 3, kstrings, kname, "-cl-std=CL2.0");
|
||||
if (error != 0) return error;
|
||||
|
||||
// Determine some local dimensions to use for the test.
|
||||
global = GSIZE;
|
||||
@@ -215,31 +349,42 @@ struct test {
|
||||
|
||||
// Limit it a bit so we have muliple work groups
|
||||
// Ideally this will still be large enough to give us multiple subgroups
|
||||
if (local > LSIZE)
|
||||
local = LSIZE;
|
||||
if (local > LSIZE) local = LSIZE;
|
||||
|
||||
// Get the sub group info
|
||||
// Get the sub group info
|
||||
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr;
|
||||
clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(platform,
|
||||
"clGetKernelSubGroupInfoKHR");
|
||||
if (clGetKernelSubGroupInfoKHR_ptr == NULL) {
|
||||
log_error("ERROR: clGetKernelSubGroupInfoKHR function not available");
|
||||
clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn)
|
||||
clGetExtensionFunctionAddressForPlatform(
|
||||
platform, "clGetKernelSubGroupInfoKHR");
|
||||
if (clGetKernelSubGroupInfoKHR_ptr == NULL)
|
||||
{
|
||||
log_error(
|
||||
"ERROR: clGetKernelSubGroupInfoKHR function not available");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
|
||||
sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
|
||||
test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR");
|
||||
error = clGetKernelSubGroupInfoKHR_ptr(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
|
||||
sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
|
||||
test_error(error,
|
||||
"clGetKernelSubGroupInfoKHR failed for "
|
||||
"CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR");
|
||||
subgroup_size = (int)tmp;
|
||||
|
||||
error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
|
||||
sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
|
||||
test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR");
|
||||
error = clGetKernelSubGroupInfoKHR_ptr(
|
||||
kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
|
||||
sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
|
||||
test_error(error,
|
||||
"clGetKernelSubGroupInfoKHR failed for "
|
||||
"CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR");
|
||||
num_subgroups = (int)tmp;
|
||||
|
||||
// Make sure the number of sub groups is what we expect
|
||||
if (num_subgroups != (local + subgroup_size - 1)/ subgroup_size) {
|
||||
log_error("ERROR: unexpected number of subgroups (%d) returned by clGetKernelSubGroupInfoKHR\n", num_subgroups);
|
||||
// Make sure the number of sub groups is what we expect
|
||||
if (num_subgroups != (local + subgroup_size - 1) / subgroup_size)
|
||||
{
|
||||
log_error("ERROR: unexpected number of subgroups (%d) returned by "
|
||||
"clGetKernelSubGroupInfoKHR\n",
|
||||
num_subgroups);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -248,38 +393,38 @@ struct test {
|
||||
size_t input_array_size = GSIZE;
|
||||
size_t output_array_size = GSIZE;
|
||||
|
||||
if (dynscl != 0) {
|
||||
input_array_size = (int)global / (int)local * num_subgroups * dynscl;
|
||||
output_array_size = (int)global / (int)local * dynscl;
|
||||
if (dynscl != 0)
|
||||
{
|
||||
input_array_size =
|
||||
(int)global / (int)local * num_subgroups * dynscl;
|
||||
output_array_size = (int)global / (int)local * dynscl;
|
||||
}
|
||||
|
||||
idata.resize(input_array_size);
|
||||
odata.resize(output_array_size);
|
||||
|
||||
// Run the kernel once on zeroes to get the map
|
||||
memset(&idata[0], 0, input_array_size * sizeof(Ty));
|
||||
error = run_kernel(context, queue, kernel, global, local,
|
||||
&idata[0], input_array_size * sizeof(Ty),
|
||||
sgmap, global*sizeof(cl_int)*2,
|
||||
&odata[0], output_array_size * sizeof(Ty),
|
||||
TSIZE*sizeof(Ty));
|
||||
if (error)
|
||||
return error;
|
||||
// Run the kernel once on zeroes to get the map
|
||||
memset(&idata[0], 0, input_array_size * sizeof(Ty));
|
||||
error = run_kernel(context, queue, kernel, global, local, &idata[0],
|
||||
input_array_size * sizeof(Ty), sgmap,
|
||||
global * sizeof(cl_int) * 2, &odata[0],
|
||||
output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
|
||||
if (error) return error;
|
||||
|
||||
// Generate the desired input for the kernel
|
||||
Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local, (int)global / (int)local);
|
||||
// Generate the desired input for the kernel
|
||||
Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local,
|
||||
(int)global / (int)local);
|
||||
|
||||
error = run_kernel(context, queue, kernel, global, local,
|
||||
&idata[0], input_array_size * sizeof(Ty),
|
||||
sgmap, global*sizeof(cl_int)*2,
|
||||
&odata[0], output_array_size * sizeof(Ty),
|
||||
TSIZE*sizeof(Ty));
|
||||
if (error)
|
||||
return error;
|
||||
error = run_kernel(context, queue, kernel, global, local, &idata[0],
|
||||
input_array_size * sizeof(Ty), sgmap,
|
||||
global * sizeof(cl_int) * 2, &odata[0],
|
||||
output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
|
||||
if (error) return error;
|
||||
|
||||
|
||||
// Check the result
|
||||
return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap, subgroup_size, (int)local, (int)global / (int)local);
|
||||
// Check the result
|
||||
return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap,
|
||||
subgroup_size, (int)local, (int)global / (int)local);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user