mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Deduplicate test_barrier (#1542)
Merge test_barrier and test_wg_barrier. Reformat using clang-format kernel source code. Signed-off-by: John Kesapides <john.kesapides@arm.com>
This commit is contained in:
@@ -52,14 +52,12 @@ set(${MODULE_NAME}_SOURCES
|
||||
test_kernel_call_kernel_function.cpp
|
||||
test_local_kernel_scope.cpp
|
||||
test_progvar.cpp
|
||||
test_wg_barrier.cpp
|
||||
test_global_linear_id.cpp
|
||||
test_local_linear_id.cpp
|
||||
test_enqueued_local_size.cpp
|
||||
test_simple_image_pitch.cpp
|
||||
test_get_linear_ids.cpp
|
||||
test_rw_image_access_qualifier.cpp
|
||||
test_wg_barrier.cpp
|
||||
test_enqueued_local_size.cpp
|
||||
test_global_linear_id.cpp
|
||||
test_local_linear_id.cpp
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -21,143 +21,136 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *barrier_kernel_code =
|
||||
"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
|
||||
"{\n"
|
||||
" int tid = get_local_id(0);\n"
|
||||
" int lsize = get_local_size(0);\n"
|
||||
" int i;\n"
|
||||
"\n"
|
||||
" tmp_sum[tid] = 0;\n"
|
||||
" for (i=tid; i<n; i+=lsize)\n"
|
||||
" tmp_sum[tid] += a[i];\n"
|
||||
" \n"
|
||||
" // updated to work for any workgroup size \n"
|
||||
" for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
|
||||
" {\n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
" if (tid + i < lsize)\n"
|
||||
" tmp_sum[tid] += tmp_sum[tid + i];\n"
|
||||
" lsize = i; \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
|
||||
" if (tid == 0)\n"
|
||||
" *sum = tmp_sum[0];\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static int
|
||||
verify_sum(int *inptr, int *outptr, int n)
|
||||
namespace {
|
||||
const char *barrier_kernel_code = R"(
|
||||
__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum,
|
||||
__global int *sum)
|
||||
{
|
||||
int r = 0;
|
||||
int i;
|
||||
int tid = get_local_id(0);
|
||||
int lsize = get_local_size(0);
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r += inptr[i];
|
||||
}
|
||||
tmp_sum[tid] = 0;
|
||||
for (i = tid; i < n; i += lsize) tmp_sum[tid] += a[i];
|
||||
|
||||
if (r != outptr[0])
|
||||
// updated to work for any workgroup size
|
||||
for (i = hadd(lsize, 1); lsize > 1; i = hadd(i, 1))
|
||||
{
|
||||
log_error("BARRIER test failed\n");
|
||||
return -1;
|
||||
BARRIER(CLK_GLOBAL_MEM_FENCE);
|
||||
if (tid + i < lsize) tmp_sum[tid] += tmp_sum[tid + i];
|
||||
lsize = i;
|
||||
}
|
||||
|
||||
log_info("BARRIER test passed\n");
|
||||
return 0;
|
||||
// no barrier is required here because last person to write to tmp_sum[0]
|
||||
// was tid 0
|
||||
if (tid == 0) *sum = tmp_sum[0];
|
||||
}
|
||||
)";
|
||||
|
||||
|
||||
void generate_random_inputs(std::vector<cl_int> &v)
|
||||
{
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
auto random_generator = [&seed]() {
|
||||
return static_cast<cl_int>(
|
||||
get_random_float(-0x01000000, 0x01000000, seed));
|
||||
};
|
||||
|
||||
std::generate(v.begin(), v.end(), random_generator);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
int test_barrier_common(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements,
|
||||
std::string barrier_str)
|
||||
{
|
||||
cl_mem streams[3];
|
||||
cl_int *input_ptr = NULL, *output_ptr = NULL;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t global_threads[3];
|
||||
size_t local_threads[3];
|
||||
int err;
|
||||
int i;
|
||||
size_t max_local_workgroup_size[3];
|
||||
size_t max_threadgroup_size = 0;
|
||||
MTdata d;
|
||||
clMemWrapper streams[3];
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" );
|
||||
cl_int output;
|
||||
int err;
|
||||
|
||||
size_t max_threadgroup_size = 0;
|
||||
std::string build_options = std::string("-DBARRIER=") + barrier_str;
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1,
|
||||
&barrier_kernel_code, "compute_sum",
|
||||
build_options.c_str());
|
||||
test_error(err, "Failed to build kernel/program.");
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
|
||||
sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
|
||||
test_error(err, "clGetKernelWorkgroupInfo failed.");
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
// Pick the minimum of the device and the kernel
|
||||
if (max_threadgroup_size > max_local_workgroup_size[0])
|
||||
max_threadgroup_size = max_local_workgroup_size[0];
|
||||
err = get_max_allowed_1d_work_group_size_on_device(device, kernel,
|
||||
&max_threadgroup_size);
|
||||
test_error(err, "get_max_allowed_1d_work_group_size_on_device failed.");
|
||||
|
||||
// work group size must divide evenly into the global size
|
||||
while( num_elements % max_threadgroup_size )
|
||||
max_threadgroup_size--;
|
||||
while (num_elements % max_threadgroup_size) max_threadgroup_size--;
|
||||
|
||||
input_ptr = (int*)malloc(sizeof(int) * num_elements);
|
||||
output_ptr = (int*)malloc(sizeof(int));
|
||||
std::vector<cl_int> input(num_elements);
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_int) * num_elements, NULL, &err);
|
||||
sizeof(cl_int) * num_elements, nullptr, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
streams[1] =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err);
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int),
|
||||
nullptr, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
streams[2] =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_int) * max_threadgroup_size, NULL, &err);
|
||||
sizeof(cl_int) * max_threadgroup_size, nullptr, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
generate_random_inputs(input);
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0,
|
||||
sizeof(cl_int) * num_elements, input.data(), 0,
|
||||
nullptr, nullptr);
|
||||
test_error(err, "clEnqueueWriteBuffer failed.");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
|
||||
err = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(num_elements), &num_elements);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof(streams[2]), &streams[2]);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof(streams[1]), &streams[1]);
|
||||
test_error(err, "clSetKernelArg failed.");
|
||||
|
||||
global_threads[0] = max_threadgroup_size;
|
||||
local_threads[0] = max_threadgroup_size;
|
||||
size_t global_threads[] = { max_threadgroup_size };
|
||||
size_t local_threads[] = { max_threadgroup_size };
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_threads,
|
||||
local_threads, 0, nullptr, nullptr);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
|
||||
err = clEnqueueReadBuffer(queue, streams[1], true, 0, sizeof(cl_int),
|
||||
&output, 0, nullptr, nullptr);
|
||||
test_error(err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
err = verify_sum(input_ptr, output_ptr, num_elements);
|
||||
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
if (std::accumulate(input.begin(), input.end(), 0) != output)
|
||||
{
|
||||
log_error("%s test failed\n", barrier_str.c_str());
|
||||
err = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("%s test passed\n", barrier_str.c_str());
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
int test_barrier(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_barrier_common(device, context, queue, num_elements, "barrier");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int test_wg_barrier(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_barrier_common(device, context, queue, num_elements,
|
||||
"work_group_barrier");
|
||||
}
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *wg_barrier_kernel_code =
|
||||
"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
|
||||
"{\n"
|
||||
" int tid = get_local_id(0);\n"
|
||||
" int lsize = get_local_size(0);\n"
|
||||
" int i;\n"
|
||||
"\n"
|
||||
" tmp_sum[tid] = 0;\n"
|
||||
" for (i=tid; i<n; i+=lsize)\n"
|
||||
" tmp_sum[tid] += a[i];\n"
|
||||
" \n"
|
||||
" // updated to work for any workgroup size \n"
|
||||
" for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
|
||||
" {\n"
|
||||
" work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
" if (tid + i < lsize)\n"
|
||||
" tmp_sum[tid] += tmp_sum[tid + i];\n"
|
||||
" lsize = i; \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
|
||||
" if (tid == 0)\n"
|
||||
" *sum = tmp_sum[0];\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static int
|
||||
verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
|
||||
{
|
||||
int i;
|
||||
int reference = 0;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
reference += inptr[i];
|
||||
}
|
||||
|
||||
if (reference != outptr[0])
|
||||
{
|
||||
log_error("work_group_barrier test failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info("work_group_barrier test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_wg_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[3];
|
||||
cl_int *input_ptr = NULL, *output_ptr = NULL, *tmp_ptr =NULL;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t global_threads[3];
|
||||
size_t local_threads[3];
|
||||
int err;
|
||||
int i;
|
||||
size_t max_local_workgroup_size[3];
|
||||
size_t max_threadgroup_size = 0;
|
||||
MTdata d;
|
||||
|
||||
err = create_single_kernel_helper_with_build_options(
|
||||
context, &program, &kernel, 1, &wg_barrier_kernel_code, "compute_sum",
|
||||
nullptr);
|
||||
test_error(err, "Failed to build kernel/program.");
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
|
||||
sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
|
||||
test_error(err, "clGetKernelWorkgroupInfo failed.");
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
// Pick the minimum of the device and the kernel
|
||||
if (max_threadgroup_size > max_local_workgroup_size[0])
|
||||
max_threadgroup_size = max_local_workgroup_size[0];
|
||||
|
||||
// work group size must divide evenly into the global size
|
||||
while( num_elements % max_threadgroup_size )
|
||||
max_threadgroup_size--;
|
||||
|
||||
input_ptr = (int*)malloc(sizeof(int) * num_elements);
|
||||
output_ptr = (int*)malloc(sizeof(int));
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_int) * num_elements, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
streams[1] =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
streams[2] =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||
sizeof(cl_int) * max_threadgroup_size, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed.");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
|
||||
test_error(err, "clSetKernelArg failed.");
|
||||
|
||||
global_threads[0] = max_threadgroup_size;
|
||||
local_threads[0] = max_threadgroup_size;
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
Reference in New Issue
Block a user