mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Thread Dimensions: Improve execution time. (#694)
Replaced a kernel used for memseting a large buffer with clEnqueueFillBuffer. Fixes #692. Addresses #684.
This commit is contained in:
@@ -139,12 +139,6 @@ static const char *thread_dimension_kernel_code_atomic_long =
|
|||||||
" if (error)\n"
|
" if (error)\n"
|
||||||
" atom_or(&dst[t_address-start_address], error);\n"
|
" atom_or(&dst[t_address-start_address], error);\n"
|
||||||
"\n"
|
"\n"
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"__kernel void clear_memory(__global uint *dst)\n\n"
|
|
||||||
"{\n"
|
|
||||||
" dst[get_global_id(0)] = 0;\n"
|
|
||||||
"\n"
|
|
||||||
"}\n";
|
"}\n";
|
||||||
|
|
||||||
static const char *thread_dimension_kernel_code_not_atomic_long =
|
static const char *thread_dimension_kernel_code_not_atomic_long =
|
||||||
@@ -168,12 +162,6 @@ static const char *thread_dimension_kernel_code_not_atomic_long =
|
|||||||
" if (error)\n"
|
" if (error)\n"
|
||||||
" dst[t_address-start_address]|=error;\n"
|
" dst[t_address-start_address]|=error;\n"
|
||||||
"\n"
|
"\n"
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"__kernel void clear_memory(__global uint *dst)\n\n"
|
|
||||||
"{\n"
|
|
||||||
" dst[get_global_id(0)] = 0;\n"
|
|
||||||
"\n"
|
|
||||||
"}\n";
|
"}\n";
|
||||||
|
|
||||||
static const char *thread_dimension_kernel_code_atomic_not_long =
|
static const char *thread_dimension_kernel_code_atomic_not_long =
|
||||||
@@ -199,12 +187,6 @@ static const char *thread_dimension_kernel_code_atomic_not_long =
|
|||||||
" if (error)\n"
|
" if (error)\n"
|
||||||
" atom_or(&dst[t_address-start_address], error);\n"
|
" atom_or(&dst[t_address-start_address], error);\n"
|
||||||
"\n"
|
"\n"
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"__kernel void clear_memory(__global uint *dst)\n\n"
|
|
||||||
"{\n"
|
|
||||||
" dst[get_global_id(0)] = 0;\n"
|
|
||||||
"\n"
|
|
||||||
"}\n";
|
"}\n";
|
||||||
|
|
||||||
static const char *thread_dimension_kernel_code_not_atomic_not_long =
|
static const char *thread_dimension_kernel_code_not_atomic_not_long =
|
||||||
@@ -228,20 +210,8 @@ static const char *thread_dimension_kernel_code_not_atomic_not_long =
|
|||||||
" if (error)\n"
|
" if (error)\n"
|
||||||
" dst[t_address-start_address]|=error;\n"
|
" dst[t_address-start_address]|=error;\n"
|
||||||
"\n"
|
"\n"
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"__kernel void clear_memory(__global uint *dst)\n\n"
|
|
||||||
"{\n"
|
|
||||||
" dst[get_global_id(0)] = 0;\n"
|
|
||||||
"\n"
|
|
||||||
"}\n";
|
"}\n";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static size_t max_workgroup_size_for_clear_kernel;
|
|
||||||
cl_kernel clear_memory_kernel = 0;
|
|
||||||
|
|
||||||
|
|
||||||
char dim_str[128];
|
char dim_str[128];
|
||||||
char *
|
char *
|
||||||
print_dimensions(size_t x, size_t y, size_t z, cl_uint dim) {
|
print_dimensions(size_t x, size_t y, size_t z, cl_uint dim) {
|
||||||
@@ -307,31 +277,19 @@ int run_test(cl_context context, cl_command_queue queue, cl_kernel kernel, cl_me
|
|||||||
while (end_valid_memory_address <= last_memory_address)
|
while (end_valid_memory_address <= last_memory_address)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
// Clear the memory
|
const int fill_pattern = 0x0;
|
||||||
// // Manually -- much slower on the GPU
|
err = clEnqueueFillBuffer(queue,
|
||||||
// memset((void*)data, 0, memory_size);
|
array,
|
||||||
// err = clWriteArray(context, array, 0, 0, memory_size, data, NULL);
|
(void*)&fill_pattern,
|
||||||
// if (err != CL_SUCCESS) {
|
sizeof(fill_pattern),
|
||||||
// log_error("Failed to write to data array: %d\n", err);
|
0,
|
||||||
// free(data);
|
memory_size,
|
||||||
// return -4;
|
0,
|
||||||
// }
|
NULL,
|
||||||
// In a kernel
|
NULL);
|
||||||
err = clSetKernelArg(clear_memory_kernel, 0, sizeof(array), &array);
|
|
||||||
if (err != CL_SUCCESS) {
|
if (err != CL_SUCCESS) {
|
||||||
print_error( err, "Failed to set args for clear_memory_kernel to clear the memory between runs");
|
print_error( err, "Failed to set fill buffer.");
|
||||||
return -4;
|
return -3;
|
||||||
}
|
|
||||||
size_t global[3] = {1,0,0};
|
|
||||||
global[0] = (cl_uint)(memory_size/sizeof(cl_uint));
|
|
||||||
size_t local[3] = {1,0,0};
|
|
||||||
local[0] = max_workgroup_size_for_clear_kernel;
|
|
||||||
while( global[0] % local[0] ) //make sure that global[0] is evenly divided by local[0]. Will stop at 1 in worst case.
|
|
||||||
local[0]--;
|
|
||||||
err = clEnqueueNDRangeKernel(queue, clear_memory_kernel, 1, NULL, global, local, 0, NULL, NULL);
|
|
||||||
if (err != CL_SUCCESS) {
|
|
||||||
print_error( err, "Failed to execute clear_memory_kernel to clear the memory between runs");
|
|
||||||
return -4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_ulong start_valid_index = start_valid_memory_address/sizeof(cl_uint);
|
cl_ulong start_valid_index = start_valid_memory_address/sizeof(cl_uint);
|
||||||
@@ -513,13 +471,6 @@ test_thread_dimensions(cl_device_id device, cl_context context, cl_command_queue
|
|||||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
||||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||||
|
|
||||||
clear_memory_kernel = clCreateKernel(program, "clear_memory", &err);
|
|
||||||
if (err)
|
|
||||||
{
|
|
||||||
log_error("clCreateKernel failed: %d\n", err);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the maximum sizes supported by this device
|
// Get the maximum sizes supported by this device
|
||||||
size_t max_workgroup_size = 0;
|
size_t max_workgroup_size = 0;
|
||||||
cl_ulong max_allocation = 0;
|
cl_ulong max_allocation = 0;
|
||||||
@@ -529,9 +480,6 @@ test_thread_dimensions(cl_device_id device, cl_context context, cl_command_queue
|
|||||||
err = get_maximums(kernel, context,
|
err = get_maximums(kernel, context,
|
||||||
&max_workgroup_size, &max_allocation, &max_physical);
|
&max_workgroup_size, &max_allocation, &max_physical);
|
||||||
|
|
||||||
err = get_maximums(clear_memory_kernel, context,
|
|
||||||
&max_workgroup_size_for_clear_kernel, &max_allocation, &max_physical);
|
|
||||||
|
|
||||||
// Make sure we don't try to allocate more than half the physical memory present.
|
// Make sure we don't try to allocate more than half the physical memory present.
|
||||||
if (max_allocation > (max_physical/2)) {
|
if (max_allocation > (max_physical/2)) {
|
||||||
log_info("Limiting max allocation to half of the maximum physical memory (%gMB of %gMB physical).\n",
|
log_info("Limiting max allocation to half of the maximum physical memory (%gMB of %gMB physical).\n",
|
||||||
@@ -820,7 +768,6 @@ test_thread_dimensions(cl_device_id device, cl_context context, cl_command_queue
|
|||||||
errors++;
|
errors++;
|
||||||
clReleaseMemObject(array);
|
clReleaseMemObject(array);
|
||||||
clReleaseKernel(kernel);
|
clReleaseKernel(kernel);
|
||||||
clReleaseKernel(clear_memory_kernel);
|
|
||||||
clReleaseProgram(program);
|
clReleaseProgram(program);
|
||||||
free_mtdata(d);
|
free_mtdata(d);
|
||||||
return -1;
|
return -1;
|
||||||
@@ -862,7 +809,6 @@ test_thread_dimensions(cl_device_id device, cl_context context, cl_command_queue
|
|||||||
free_mtdata(d);
|
free_mtdata(d);
|
||||||
clReleaseMemObject(array);
|
clReleaseMemObject(array);
|
||||||
clReleaseKernel(kernel);
|
clReleaseKernel(kernel);
|
||||||
clReleaseKernel(clear_memory_kernel);
|
|
||||||
clReleaseProgram(program);
|
clReleaseProgram(program);
|
||||||
if (errors)
|
if (errors)
|
||||||
log_error("%d total errors.\n", errors);
|
log_error("%d total errors.\n", errors);
|
||||||
|
|||||||
Reference in New Issue
Block a user