mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-21 23:09:01 +00:00
Initial open source release of OpenCL 2.2 CTS.
This commit is contained in:
12
test_conformance/clcpp/subgroups/CMakeLists.txt
Normal file
12
test_conformance/clcpp/subgroups/CMakeLists.txt
Normal file
@@ -0,0 +1,12 @@
|
||||
set(MODULE_NAME CPP_SUBGROUPS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
../../../test_common/harness/errorHelpers.c
|
||||
../../../test_common/harness/testHarness.c
|
||||
../../../test_common/harness/kernelHelpers.c
|
||||
../../../test_common/harness/msvc9.c
|
||||
../../../test_common/harness/parseParameters.cpp
|
||||
)
|
||||
|
||||
include(../../CMakeCommon.txt)
|
||||
97
test_conformance/clcpp/subgroups/common.hpp
Normal file
97
test_conformance/clcpp/subgroups/common.hpp
Normal file
@@ -0,0 +1,97 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
|
||||
enum class work_group_op : int {
|
||||
add, min, max
|
||||
};
|
||||
|
||||
std::string to_string(work_group_op op)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case work_group_op::add:
|
||||
return "add";
|
||||
case work_group_op::min:
|
||||
return "min";
|
||||
case work_group_op::max:
|
||||
return "max";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
std::vector<CL_INT_TYPE> generate_input(size_t count, size_t wg_size)
|
||||
{
|
||||
std::vector<CL_INT_TYPE> input(count, CL_INT_TYPE(1));
|
||||
switch (op)
|
||||
{
|
||||
case work_group_op::add:
|
||||
return input;
|
||||
case work_group_op::min:
|
||||
{
|
||||
size_t j = wg_size;
|
||||
for(size_t i = 0; i < count; i++)
|
||||
{
|
||||
input[i] = static_cast<CL_INT_TYPE>(j);
|
||||
j--;
|
||||
if(j == 0)
|
||||
{
|
||||
j = wg_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case work_group_op::max:
|
||||
{
|
||||
size_t j = 0;
|
||||
for(size_t i = 0; i < count; i++)
|
||||
{
|
||||
input[i] = static_cast<CL_INT_TYPE>(j);
|
||||
j++;
|
||||
if(j == wg_size)
|
||||
{
|
||||
j = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
std::vector<CL_INT_TYPE> generate_output(size_t count, size_t wg_size)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case work_group_op::add:
|
||||
return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
|
||||
case work_group_op::min:
|
||||
return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::max)());
|
||||
case work_group_op::max:
|
||||
return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::min)());
|
||||
}
|
||||
return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP
|
||||
34
test_conformance/clcpp/subgroups/main.cpp
Normal file
34
test_conformance/clcpp/subgroups/main.cpp
Normal file
@@ -0,0 +1,34 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../common.hpp"
|
||||
|
||||
#include "test_sg_all.hpp"
|
||||
#include "test_sg_any.hpp"
|
||||
#include "test_sg_broadcast.hpp"
|
||||
#include "test_sg_reduce.hpp"
|
||||
#include "test_sg_scan_inclusive.hpp"
|
||||
#include "test_sg_scan_exclusive.hpp"
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
// Get list to all test functions
|
||||
std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
|
||||
// Get names of all test functions
|
||||
std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
|
||||
// Create a vector of pointers to the names test functions
|
||||
std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
|
||||
return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
|
||||
}
|
||||
219
test_conformance/clcpp/subgroups/test_sg_all.hpp
Normal file
219
test_conformance/clcpp/subgroups/test_sg_all.hpp
Normal file
@@ -0,0 +1,219 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
// Common for tests of sub-group functions
|
||||
#include "common.hpp"
|
||||
|
||||
std::string generate_sg_all_kernel_code()
|
||||
{
|
||||
return "#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_work_group>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_sg_all(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" ulong tid = get_global_id(0);\n"
|
||||
" bool result = sub_group_all(input[tid] < input[tid+1]);\n"
|
||||
" if(!result) {\n output[tid] = 0;\n return;\n }\n"
|
||||
" output[tid] = 1;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
int verify_sg_all(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < count; i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
|
||||
{
|
||||
// sub-group all
|
||||
bool all = true;
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if(!(in[i+j+k] < in[i+j+k+1]))
|
||||
{
|
||||
all = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert bool to uint
|
||||
cl_uint all_uint = all ? 1 : 0;
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (all_uint != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_all %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(all_uint),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
std::vector<cl_uint> generate_input_sg_all(size_t count, size_t wg_size)
|
||||
{
|
||||
std::vector<cl_uint> input(count, cl_uint(0));
|
||||
size_t j = wg_size;
|
||||
for(size_t i = 0; i < count; i++)
|
||||
{
|
||||
input[i] = static_cast<cl_uint>(i);
|
||||
// In one place in ~half of work-groups (input[tid] < input[tid+1]) will
|
||||
// generate false, it means that for sub_group_all(input[tid] < input[tid+1])
|
||||
// should return false for all sub-groups in that work-groups
|
||||
if((j == wg_size/2) && (i > count/2))
|
||||
{
|
||||
input[i] = input[i - 1];
|
||||
}
|
||||
j--;
|
||||
if(j == 0)
|
||||
{
|
||||
j = wg_size;
|
||||
}
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
std::vector<cl_uint> generate_output_sg_all(size_t count, size_t wg_size)
|
||||
{
|
||||
(void) wg_size;
|
||||
return std::vector<cl_uint>(count, cl_uint(1));
|
||||
}
|
||||
|
||||
int sub_group_all(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
|
||||
{
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t wg_size;
|
||||
size_t sg_max_size;
|
||||
size_t work_size[1];
|
||||
int err;
|
||||
|
||||
std::string code_str = generate_sg_all_kernel_code();
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_all");
|
||||
RETURN_ON_ERROR(err)
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
|
||||
return CL_SUCCESS;
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_all");
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
|
||||
|
||||
size_t param_value_size = 0;
|
||||
err = clGetKernelSubGroupInfo(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t), static_cast<void*>(&wg_size),
|
||||
sizeof(size_t), static_cast<void*>(&sg_max_size),
|
||||
¶m_value_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
|
||||
|
||||
// Verify size of returned param
|
||||
if(param_value_size != sizeof(size_t))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
|
||||
sizeof(size_t),
|
||||
param_value_size
|
||||
)
|
||||
}
|
||||
|
||||
// Calculate global work size
|
||||
size_t flat_work_size;
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
flat_work_size = wg_number * wg_size;
|
||||
work_size[0] = flat_work_size;
|
||||
|
||||
std::vector<cl_uint> input = generate_input_sg_all(flat_work_size + 1, wg_size);
|
||||
std::vector<cl_uint> output = generate_output_sg_all(flat_work_size, wg_size);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
if (verify_sg_all(input, output, flat_work_size, wg_size, sg_max_size) != CL_SUCCESS)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "sub_group_all failed");
|
||||
}
|
||||
log_info("sub_group_all passed\n");
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_all)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int err = CL_SUCCESS;
|
||||
err = sub_group_all(device, context, queue, n_elems);
|
||||
CHECK_ERROR(err)
|
||||
return err;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP
|
||||
219
test_conformance/clcpp/subgroups/test_sg_any.hpp
Normal file
219
test_conformance/clcpp/subgroups/test_sg_any.hpp
Normal file
@@ -0,0 +1,219 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
// Common for tests of sub-group functions
|
||||
#include "common.hpp"
|
||||
|
||||
std::string generate_sg_any_kernel_code()
|
||||
{
|
||||
return "#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_work_group>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_sg_any(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" ulong tid = get_global_id(0);\n"
|
||||
" bool result = sub_group_any(input[tid] == input[tid+1]);\n"
|
||||
" if(!result) {\n output[tid] = 0;\n return;\n }\n"
|
||||
" output[tid] = 1;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
int verify_sg_any(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < count; i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
|
||||
{
|
||||
// sub-group any
|
||||
bool any = false;
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if(in[i+j+k] == in[i+j+k+1])
|
||||
{
|
||||
any = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert bool to uint
|
||||
cl_uint any_uint = any ? 1 : 0;
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (any_uint != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(any_uint),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
std::vector<cl_uint> generate_input_sg_any(size_t count, size_t wg_size)
|
||||
{
|
||||
std::vector<cl_uint> input(count, cl_uint(0));
|
||||
size_t j = wg_size;
|
||||
for(size_t i = 0; i < count; i++)
|
||||
{
|
||||
input[i] = static_cast<cl_uint>(i);
|
||||
// In one place in ~half of work-groups (input[tid] == input[tid+1]) will
|
||||
// generate true, it means that for sub_group_all(input[tid] == input[tid+1])
|
||||
// should return false for one sub-group in that work-groups
|
||||
if((j == wg_size/2) && (i > count/2))
|
||||
{
|
||||
input[i] = input[i - 1];
|
||||
}
|
||||
j--;
|
||||
if(j == 0)
|
||||
{
|
||||
j = wg_size;
|
||||
}
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
std::vector<cl_uint> generate_output_sg_any(size_t count, size_t wg_size)
|
||||
{
|
||||
(void) wg_size;
|
||||
return std::vector<cl_uint>(count, cl_uint(1));
|
||||
}
|
||||
|
||||
int sub_group_any(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
|
||||
{
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t wg_size;
|
||||
size_t sg_max_size;
|
||||
size_t work_size[1];
|
||||
int err;
|
||||
|
||||
std::string code_str = generate_sg_any_kernel_code();
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_any");
|
||||
RETURN_ON_ERROR(err)
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
|
||||
return CL_SUCCESS;
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_any");
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
|
||||
|
||||
size_t param_value_size = 0;
|
||||
err = clGetKernelSubGroupInfo(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t), static_cast<void*>(&wg_size),
|
||||
sizeof(size_t), static_cast<void*>(&sg_max_size),
|
||||
¶m_value_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
|
||||
|
||||
// Verify size of returned param
|
||||
if(param_value_size != sizeof(size_t))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
|
||||
sizeof(size_t),
|
||||
param_value_size
|
||||
)
|
||||
}
|
||||
|
||||
// Calculate global work size
|
||||
size_t flat_work_size;
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
flat_work_size = wg_number * wg_size;
|
||||
work_size[0] = flat_work_size;
|
||||
|
||||
std::vector<cl_uint> input = generate_input_sg_any(flat_work_size + 1, wg_size);
|
||||
std::vector<cl_uint> output = generate_output_sg_any(flat_work_size, wg_size);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
if (verify_sg_any(input, output, flat_work_size, wg_size, sg_max_size) != CL_SUCCESS)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "sub_group_any failed");
|
||||
}
|
||||
log_info("sub_group_any passed\n");
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_any)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int err = CL_SUCCESS;
|
||||
err = sub_group_any(device, context, queue, n_elems);
|
||||
CHECK_ERROR(err)
|
||||
return err;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP
|
||||
204
test_conformance/clcpp/subgroups/test_sg_broadcast.hpp
Normal file
204
test_conformance/clcpp/subgroups/test_sg_broadcast.hpp
Normal file
@@ -0,0 +1,204 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
// Common for tests of sub-group functions
|
||||
#include "common.hpp"
|
||||
|
||||
std::string generate_sg_broadcast_kernel_code()
|
||||
{
|
||||
return
|
||||
"#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_work_group>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_sg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
|
||||
"{\n"
|
||||
" ulong tid = get_global_id(0);\n"
|
||||
" uint result = sub_group_broadcast(input[tid], 0);\n"
|
||||
" output[tid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
int
|
||||
verify_sg_broadcast(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < count; i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
|
||||
{
|
||||
// sub-group broadcast
|
||||
cl_uint broadcast_result = in[i+j];
|
||||
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (broadcast_result != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(broadcast_result),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
std::vector<cl_uint> generate_input_sg_broadcast(size_t count, size_t wg_size)
|
||||
{
|
||||
std::vector<cl_uint> input(count, cl_uint(0));
|
||||
size_t j = wg_size;
|
||||
for(size_t i = 0; i < count; i++)
|
||||
{
|
||||
input[i] = static_cast<cl_uint>(j);
|
||||
j--;
|
||||
if(j == 0)
|
||||
{
|
||||
j = wg_size;
|
||||
}
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
std::vector<cl_uint> generate_output_sg_broadcast(size_t count, size_t wg_size)
|
||||
{
|
||||
(void) wg_size;
|
||||
return std::vector<cl_uint>(count, cl_uint(1));
|
||||
}
|
||||
|
||||
int sub_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
|
||||
{
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t wg_size;
|
||||
size_t sg_max_size;
|
||||
size_t work_size[] = { 1 };
|
||||
int err;
|
||||
|
||||
// Get kernel source code
|
||||
std::string code_str = generate_sg_broadcast_kernel_code();
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast");
|
||||
RETURN_ON_ERROR(err)
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
|
||||
return CL_SUCCESS;
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast");
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
// Get max flat workgroup size
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
|
||||
|
||||
size_t param_value_size = 0;
|
||||
err = clGetKernelSubGroupInfo(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t), static_cast<void*>(&wg_size),
|
||||
sizeof(size_t), static_cast<void*>(&sg_max_size),
|
||||
¶m_value_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
|
||||
|
||||
// Verify size of returned param
|
||||
if(param_value_size != sizeof(size_t))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
|
||||
sizeof(size_t),
|
||||
param_value_size
|
||||
)
|
||||
}
|
||||
|
||||
// Calculate global work size
|
||||
size_t flat_work_size = count;
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
flat_work_size = wg_number * wg_size;
|
||||
work_size[0] = flat_work_size;
|
||||
|
||||
std::vector<cl_uint> input = generate_input_sg_broadcast(flat_work_size, wg_size);
|
||||
std::vector<cl_uint> output = generate_output_sg_broadcast(flat_work_size, wg_size);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL,&err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
int result = verify_sg_broadcast( input, output, work_size[0], wg_size, sg_max_size);
|
||||
RETURN_ON_ERROR_MSG(result, "sub_group_broadcast failed")
|
||||
log_info("sub_group_broadcast passed\n");
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_broadcast)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int err = CL_SUCCESS;
|
||||
err = sub_group_broadcast(device, context, queue, n_elems);
|
||||
CHECK_ERROR(err)
|
||||
return err;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
|
||||
345
test_conformance/clcpp/subgroups/test_sg_reduce.hpp
Normal file
345
test_conformance/clcpp/subgroups/test_sg_reduce.hpp
Normal file
@@ -0,0 +1,345 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
// Common for tests of sub-group functions
|
||||
#include "common.hpp"
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
std::string generate_sg_reduce_kernel_code()
|
||||
{
|
||||
return "#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_work_group>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_sg_reduce(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
|
||||
"global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
|
||||
"{\n"
|
||||
" ulong tid = get_global_id(0);\n"
|
||||
" " + type_name<CL_INT_TYPE>() + " result = sub_group_reduce<work_group_op::" + to_string(op) + ">(input[tid]);\n"
|
||||
" output[tid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_reduce_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE sum = 0;
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
sum += in[i + j + k];
|
||||
}
|
||||
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (sum != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_reduce_add %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(sum),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_reduce_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
|
||||
}
|
||||
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (min != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_reduce_min %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(min),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_reduce_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
|
||||
}
|
||||
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (max != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_reduce_max %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(max),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
int verify_sg_reduce(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case work_group_op::add:
|
||||
return verify_sg_reduce_add(in, out, wg_size, sg_size);
|
||||
case work_group_op::min:
|
||||
return verify_sg_reduce_min(in, out, wg_size, sg_size);
|
||||
case work_group_op::max:
|
||||
return verify_sg_reduce_max(in, out, wg_size, sg_size);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
int sub_group_reduce(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
|
||||
{
|
||||
// don't run test for unsupported types
|
||||
if(!type_supported<CL_INT_TYPE>(device))
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t wg_size;
|
||||
size_t sg_max_size;
|
||||
size_t work_size[1];
|
||||
int err;
|
||||
|
||||
std::string code_str = generate_sg_reduce_kernel_code<CL_INT_TYPE, op>();
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_reduce");
|
||||
RETURN_ON_ERROR(err)
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
|
||||
return CL_SUCCESS;
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_reduce");
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
|
||||
|
||||
size_t param_value_size = 0;
|
||||
err = clGetKernelSubGroupInfo(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t), static_cast<void*>(&wg_size),
|
||||
sizeof(size_t), static_cast<void*>(&sg_max_size),
|
||||
¶m_value_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
|
||||
|
||||
// Verify size of returned param
|
||||
if(param_value_size != sizeof(size_t))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
|
||||
sizeof(size_t),
|
||||
param_value_size
|
||||
)
|
||||
}
|
||||
|
||||
// Calculate global work size
|
||||
size_t flat_work_size;
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
flat_work_size = wg_number * wg_size;
|
||||
work_size[0] = flat_work_size;
|
||||
|
||||
std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
|
||||
std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
if (verify_sg_reduce<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "sub_group_reduce_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
|
||||
}
|
||||
log_info("sub_group_reduce_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_reduce_add)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_reduce<cl_int, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_uint, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_long, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_reduce_min)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_reduce<cl_int, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_uint, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_long, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_reduce_max)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_reduce<cl_int, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_uint, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_long, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_reduce<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP
|
||||
325
test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp
Normal file
325
test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp
Normal file
@@ -0,0 +1,325 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
// Common for tests of sub-group functions
|
||||
#include "common.hpp"
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
std::string generate_sg_scan_exclusive_kernel_code()
|
||||
{
|
||||
return "#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_work_group>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_sg_scan_exclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
|
||||
"global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
|
||||
"{\n"
|
||||
" ulong tid = get_global_id(0);\n"
|
||||
" " + type_name<CL_INT_TYPE>() + " result = sub_group_scan_exclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
|
||||
" output[tid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_scan_exclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE sum = 0;
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (sum != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(sum),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
sum += in[i + j + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_scan_exclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (min != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(min),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_scan_exclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
if (max != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(max),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
int verify_sg_scan_exclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case work_group_op::add:
|
||||
return verify_sg_scan_exclusive_add(in, out, wg_size, sg_size);
|
||||
case work_group_op::min:
|
||||
return verify_sg_scan_exclusive_min(in, out, wg_size, sg_size);
|
||||
case work_group_op::max:
|
||||
return verify_sg_scan_exclusive_max(in, out, wg_size, sg_size);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
int sub_group_scan_exclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
|
||||
{
|
||||
// don't run test for unsupported types
|
||||
if(!type_supported<CL_INT_TYPE>(device))
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t wg_size;
|
||||
size_t sg_max_size;
|
||||
size_t work_size[1];
|
||||
int err;
|
||||
|
||||
std::string code_str = generate_sg_scan_exclusive_kernel_code<CL_INT_TYPE, op>();
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_exclusive");
|
||||
RETURN_ON_ERROR(err)
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
|
||||
return CL_SUCCESS;
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_exclusive");
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
|
||||
|
||||
size_t param_value_size = 0;
|
||||
err = clGetKernelSubGroupInfo(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t), static_cast<void*>(&wg_size),
|
||||
sizeof(size_t), static_cast<void*>(&sg_max_size),
|
||||
¶m_value_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
|
||||
|
||||
// Verify size of returned param
|
||||
if(param_value_size != sizeof(size_t))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
|
||||
sizeof(size_t),
|
||||
param_value_size
|
||||
)
|
||||
}
|
||||
|
||||
// Calculate global work size
|
||||
size_t flat_work_size;
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
flat_work_size = wg_number * wg_size;
|
||||
work_size[0] = flat_work_size;
|
||||
|
||||
std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
|
||||
std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
if (verify_sg_scan_exclusive<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "sub_group_scan_exclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
|
||||
}
|
||||
log_info("sub_group_scan_exclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_scan_exclusive_add)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_scan_exclusive_min)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
local_error = sub_group_scan_exclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
local_error = sub_group_scan_exclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_scan_exclusive_max)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP
|
||||
332
test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp
Normal file
332
test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp
Normal file
@@ -0,0 +1,332 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
|
||||
#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
// Common for all OpenCL C++ tests
|
||||
#include "../common.hpp"
|
||||
// Common for tests of sub-group functions
|
||||
#include "common.hpp"
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
std::string generate_sg_scan_inclusive_kernel_code()
|
||||
{
|
||||
return "#include <opencl_memory>\n"
|
||||
"#include <opencl_work_item>\n"
|
||||
"#include <opencl_work_group>\n"
|
||||
"using namespace cl;\n"
|
||||
"__kernel void test_sg_scan_inclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
|
||||
"global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
|
||||
"{\n"
|
||||
" ulong tid = get_global_id(0);\n"
|
||||
" " + type_name<CL_INT_TYPE>() + " result = sub_group_scan_inclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
|
||||
" output[tid] = result;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_scan_inclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE sum = 0;
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
sum += in[i + j + k];
|
||||
if (sum != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(sum),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_scan_inclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
|
||||
if (min != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(min),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE>
|
||||
int verify_sg_scan_inclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
size_t i, j, k;
|
||||
for (i = 0; i < in.size(); i += wg_size)
|
||||
{
|
||||
for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
|
||||
{
|
||||
CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
|
||||
// Check if all work-items in sub-group stored correct value
|
||||
for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
|
||||
{
|
||||
max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
|
||||
if (max != out[i + j + k])
|
||||
{
|
||||
log_info(
|
||||
"sub_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
|
||||
type_name<cl_uint>().c_str(),
|
||||
i + j,
|
||||
static_cast<size_t>(max),
|
||||
static_cast<size_t>(out[i + j + k]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
int verify_sg_scan_inclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
|
||||
{
|
||||
switch (op)
|
||||
{
|
||||
case work_group_op::add:
|
||||
return verify_sg_scan_inclusive_add(in, out, wg_size, sg_size);
|
||||
case work_group_op::min:
|
||||
return verify_sg_scan_inclusive_min(in, out, wg_size, sg_size);
|
||||
case work_group_op::max:
|
||||
return verify_sg_scan_inclusive_max(in, out, wg_size, sg_size);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template <class CL_INT_TYPE, work_group_op op>
|
||||
int sub_group_scan_inclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
|
||||
{
|
||||
// don't run test for unsupported types
|
||||
if(!type_supported<CL_INT_TYPE>(device))
|
||||
{
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_mem buffers[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t wg_size;
|
||||
size_t sg_max_size;
|
||||
size_t work_size[1];
|
||||
int err;
|
||||
|
||||
std::string code_str = generate_sg_scan_inclusive_kernel_code<CL_INT_TYPE, op>();
|
||||
// -----------------------------------------------------------------------------------
|
||||
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
|
||||
// -----------------------------------------------------------------------------------
|
||||
// Only OpenCL C++ to SPIR-V compilation
|
||||
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive");
|
||||
RETURN_ON_ERROR(err)
|
||||
return err;
|
||||
// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
|
||||
#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
|
||||
log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
|
||||
return CL_SUCCESS;
|
||||
#else
|
||||
err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive");
|
||||
RETURN_ON_ERROR(err)
|
||||
#endif
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
|
||||
|
||||
size_t param_value_size = 0;
|
||||
err = clGetKernelSubGroupInfo(
|
||||
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
|
||||
sizeof(size_t), static_cast<void*>(&wg_size),
|
||||
sizeof(size_t), static_cast<void*>(&sg_max_size),
|
||||
¶m_value_size
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
|
||||
|
||||
// Verify size of returned param
|
||||
if(param_value_size != sizeof(size_t))
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1,
|
||||
"Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
|
||||
sizeof(size_t),
|
||||
param_value_size
|
||||
)
|
||||
}
|
||||
|
||||
// Calculate global work size
|
||||
size_t flat_work_size;
|
||||
size_t wg_number = static_cast<size_t>(
|
||||
std::ceil(static_cast<double>(count) / wg_size)
|
||||
);
|
||||
flat_work_size = wg_number * wg_size;
|
||||
work_size[0] = flat_work_size;
|
||||
|
||||
std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
|
||||
std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
|
||||
|
||||
buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
|
||||
RETURN_ON_CL_ERROR(err, "clCreateBuffer");
|
||||
|
||||
err = clEnqueueWriteBuffer(
|
||||
queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
|
||||
static_cast<void *>(input.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
|
||||
RETURN_ON_CL_ERROR(err, "clSetKernelArg");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
|
||||
|
||||
err = clEnqueueReadBuffer(
|
||||
queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
|
||||
static_cast<void *>(output.data()), 0, NULL, NULL
|
||||
);
|
||||
RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
|
||||
|
||||
if (verify_sg_scan_inclusive<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
|
||||
{
|
||||
RETURN_ON_ERROR_MSG(-1, "sub_group_scan_inclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
|
||||
}
|
||||
log_info("sub_group_scan_inclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
|
||||
|
||||
clReleaseMemObject(buffers[0]);
|
||||
clReleaseMemObject(buffers[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
return err;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_scan_inclusive_add)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_scan_inclusive_min)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
AUTO_TEST_CASE(test_sub_group_scan_inclusive_max)
|
||||
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int local_error = CL_SUCCESS;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
|
||||
CHECK_ERROR(local_error)
|
||||
error |= local_error;
|
||||
|
||||
if(error != CL_SUCCESS)
|
||||
return -1;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
|
||||
Reference in New Issue
Block a user