mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Initial open source release of OpenCL 2.2 CTS.
This commit is contained in:
20
test_conformance/thread_dimensions/CMakeLists.txt
Normal file
20
test_conformance/thread_dimensions/CMakeLists.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
set(MODULE_NAME THREAD_DIMENSIONS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.c
|
||||
test_thread_dimensions.c
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/msvc9.c
|
||||
../../test_common/harness/parseParameters.cpp
|
||||
)
|
||||
|
||||
set_source_files_properties(
|
||||
COMPILE_FLAGS -msse2)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
16
test_conformance/thread_dimensions/Jamfile
Normal file
16
test_conformance/thread_dimensions/Jamfile
Normal file
@@ -0,0 +1,16 @@
|
||||
project
|
||||
: requirements
|
||||
# <toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe test_thread_dimensions
|
||||
: main.c
|
||||
test_thread_dimensions.c
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_thread_dimensions
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/thread_dimensions
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/thread_dimensions
|
||||
;
|
||||
44
test_conformance/thread_dimensions/Makefile
Normal file
44
test_conformance/thread_dimensions/Makefile
Normal file
@@ -0,0 +1,44 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c \
|
||||
test_thread_dimensions.c \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/threadTesting.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/kernelHelpers.c \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/conversions.c \
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
|
||||
DEFINES =
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
FRAMEWORK = $(SOURCES)
|
||||
HEADERS =
|
||||
TARGET = test_thread_dimensions
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Os -Wshorten-64-to-32
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
64
test_conformance/thread_dimensions/main.c
Normal file
64
test_conformance/thread_dimensions/main.c
Normal file
@@ -0,0 +1,64 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "procs.h"
|
||||
|
||||
basefn basefn_list[] = {
|
||||
test_quick_thread_dimensions_1d_explicit_local,
|
||||
test_quick_thread_dimensions_2d_explicit_local,
|
||||
test_quick_thread_dimensions_3d_explicit_local,
|
||||
test_quick_thread_dimensions_1d_implicit_local,
|
||||
test_quick_thread_dimensions_2d_implicit_local,
|
||||
test_quick_thread_dimensions_3d_implicit_local,
|
||||
test_full_thread_dimensions_1d_explicit_local,
|
||||
test_full_thread_dimensions_2d_explicit_local,
|
||||
test_full_thread_dimensions_3d_explicit_local,
|
||||
test_full_thread_dimensions_1d_implicit_local,
|
||||
test_full_thread_dimensions_2d_implicit_local,
|
||||
test_full_thread_dimensions_3d_implicit_local,
|
||||
};
|
||||
|
||||
const char *commonfn_names[] = {
|
||||
"quick_1d_explicit_local",
|
||||
"quick_2d_explicit_local",
|
||||
"quick_3d_explicit_local",
|
||||
"quick_1d_implicit_local",
|
||||
"quick_2d_implicit_local",
|
||||
"quick_3d_implicit_local",
|
||||
"full_1d_explicit_local",
|
||||
"full_2d_explicit_local",
|
||||
"full_3d_explicit_local",
|
||||
"full_1d_implicit_local",
|
||||
"full_2d_implicit_local",
|
||||
"full_3d_implicit_local",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(commonfn_names) / sizeof(commonfn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_commonfns = sizeof(commonfn_names) / sizeof(char *);
|
||||
|
||||
int
|
||||
main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarness( argc, argv, num_commonfns, basefn_list, commonfn_names, false, false, 0 );
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
37
test_conformance/thread_dimensions/procs.h
Normal file
37
test_conformance/thread_dimensions/procs.h
Normal file
@@ -0,0 +1,37 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
extern const int kVectorSizeCount;
|
||||
|
||||
extern int test_quick_thread_dimensions_1d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_thread_dimensions_2d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_thread_dimensions_3d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_thread_dimensions_1d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_thread_dimensions_2d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_thread_dimensions_3d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_full_thread_dimensions_1d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_full_thread_dimensions_2d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_full_thread_dimensions_3d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_full_thread_dimensions_1d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_full_thread_dimensions_2d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_full_thread_dimensions_3d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
991
test_conformance/thread_dimensions/test_thread_dimensions.c
Normal file
991
test_conformance/thread_dimensions/test_thread_dimensions.c
Normal file
@@ -0,0 +1,991 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#define ITERATIONS 4
|
||||
#define DEBUG 0
|
||||
|
||||
// If the environment variable DO_NOT_LIMIT_THREAD_SIZE is not set, the test will limit the maximum total
|
||||
// global dimensions tested to this value.
|
||||
#define MAX_TOTAL_GLOBAL_THREADS_FOR_TEST (1<<24)
|
||||
int limit_size = 0;
|
||||
|
||||
static int
|
||||
get_maximums(cl_kernel kernel, cl_context context,
|
||||
size_t *max_workgroup_size_result,
|
||||
size_t *max_width_result,
|
||||
size_t *max_height_result,
|
||||
cl_ulong *max_allcoation_result,
|
||||
cl_ulong *max_physical_result) {
|
||||
int err = 0;
|
||||
cl_uint i;
|
||||
cl_device_id *devices;
|
||||
|
||||
// Get all the devices in the device group
|
||||
size_t num_devices_returned;
|
||||
err = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &num_devices_returned);
|
||||
if(err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetContextInfo() failed (%d).\n", err);
|
||||
return -10;
|
||||
}
|
||||
devices = (cl_device_id *)malloc(num_devices_returned);
|
||||
err = clGetContextInfo(context, CL_CONTEXT_DEVICES, num_devices_returned, devices, NULL);
|
||||
if(err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetContextInfo() failed (%d).\n", err);
|
||||
return -10;
|
||||
}
|
||||
num_devices_returned /= sizeof(cl_device_id);
|
||||
if (num_devices_returned > 1) log_info("%d devices in device group.\n", (int)num_devices_returned);
|
||||
if (num_devices_returned < 1) {
|
||||
log_error("0 devices found for this kernel.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Iterate over them and find the maximum local workgroup size and image size
|
||||
size_t max_workgroup_size = 0;
|
||||
size_t current_workgroup_size = 0;
|
||||
size_t max_width = 0;
|
||||
size_t current_width = 0;
|
||||
size_t max_height = 0;
|
||||
size_t current_height = 0;
|
||||
cl_ulong max_allocation = 0;
|
||||
cl_ulong current_allocation = 0;
|
||||
cl_ulong max_physical = 0;
|
||||
cl_ulong current_physical = 0;
|
||||
|
||||
for (i=0; i<num_devices_returned; i++) {
|
||||
// Max workgroup size for this kernel on this device
|
||||
err = clGetKernelWorkGroupInfo(kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof(current_workgroup_size), ¤t_workgroup_size, NULL);
|
||||
if(err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetKernelWorkGroupInfo() failed (%d) for device %d.\n", err, i);
|
||||
return -10;
|
||||
}
|
||||
if (max_workgroup_size == 0)
|
||||
max_workgroup_size = current_workgroup_size;
|
||||
else if (current_workgroup_size < max_workgroup_size)
|
||||
max_workgroup_size = current_workgroup_size;
|
||||
|
||||
// Max image width for this device
|
||||
err = clGetDeviceInfo(devices[i], CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(current_width), ¤t_width, NULL);
|
||||
if(err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetDeviceConfigInfo(CL_DEVICE_IMAGE2D_MAX_WIDTH) failed (%d) for device %d.\n", err, i);
|
||||
return -10;
|
||||
}
|
||||
if (max_width == 0)
|
||||
max_width = current_width;
|
||||
else if (current_width < max_width)
|
||||
max_width = current_width;
|
||||
|
||||
// Max image height for this device
|
||||
err = clGetDeviceInfo(devices[i], CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(current_height), ¤t_height, NULL);
|
||||
if(err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetDeviceConfigInfo(CL_DEVICE_IMAGE2D_MAX_HEIGHT) failed (%d) for device %d.\n", err, i);
|
||||
return -10;
|
||||
}
|
||||
if (max_height == 0)
|
||||
max_height = current_height;
|
||||
else if (current_height < max_height)
|
||||
max_height = current_height;
|
||||
|
||||
// Get the maximum allocation size
|
||||
err = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(current_allocation), ¤t_allocation, NULL);
|
||||
if(err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetDeviceConfigInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE) failed (%d) for device %d.\n", err, i);
|
||||
return -10;
|
||||
}
|
||||
if (max_allocation == 0)
|
||||
max_allocation = current_allocation;
|
||||
else if (current_allocation < max_allocation)
|
||||
max_allocation = current_allocation;
|
||||
|
||||
// Get the maximum physical size
|
||||
err = clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(current_physical), ¤t_physical, NULL);
|
||||
if(err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetDeviceConfigInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed (%d) for device %d.\n", err, i);
|
||||
return -10;
|
||||
}
|
||||
if (max_physical == 0)
|
||||
max_physical = current_physical;
|
||||
else if (current_physical < max_allocation)
|
||||
max_physical = current_physical;
|
||||
}
|
||||
free(devices);
|
||||
|
||||
log_info("Device maximums: max local workgroup size:%d, max width: %d, max height: %d, max allocation size: %g MB, max physical memory %gMB\n",
|
||||
(int)max_workgroup_size, (int)max_width, (int)max_height, (double)(max_allocation/1024.0/1024.0), (double)(max_physical/1024.0/1024.0));
|
||||
*max_workgroup_size_result = max_workgroup_size;
|
||||
*max_width_result = max_width;
|
||||
*max_height_result = max_height;
|
||||
*max_allcoation_result = max_allocation;
|
||||
*max_physical_result = max_physical;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *thread_dimension_kernel_code_atomic_long =
|
||||
"\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
|
||||
"__kernel void test_thread_dimension_atomic(__global uint *dst, \n"
|
||||
" uint final_x_size, uint final_y_size, uint final_z_size,\n"
|
||||
" ulong start_address, ulong end_address)\n"
|
||||
"{\n"
|
||||
" uint error = 0;\n"
|
||||
" if (get_global_id(0) >= final_x_size)\n"
|
||||
" error = 64;\n"
|
||||
" if (get_global_id(1) >= final_y_size)\n"
|
||||
" error = 128;\n"
|
||||
" if (get_global_id(2) >= final_z_size)\n"
|
||||
" error = 256;\n"
|
||||
"\n"
|
||||
" unsigned long t_address = (unsigned long)get_global_id(2)*(unsigned long)final_y_size*(unsigned long)final_x_size + \n"
|
||||
" (unsigned long)get_global_id(1)*(unsigned long)final_x_size + (unsigned long)get_global_id(0);\n"
|
||||
" if ((t_address >= start_address) && (t_address < end_address))\n"
|
||||
" atom_add(&dst[t_address-start_address], 1u);\n"
|
||||
" if (error)\n"
|
||||
" atom_or(&dst[t_address-start_address], error);\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void clear_memory(__global uint *dst)\n\n"
|
||||
"{\n"
|
||||
" dst[get_global_id(0)] = 0;\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
static const char *thread_dimension_kernel_code_not_atomic_long =
|
||||
"\n"
|
||||
"__kernel void test_thread_dimension_not_atomic(__global uint *dst, \n"
|
||||
" uint final_x_size, uint final_y_size, uint final_z_size,\n"
|
||||
" ulong start_address, ulong end_address)\n"
|
||||
"{\n"
|
||||
" uint error = 0;\n"
|
||||
" if (get_global_id(0) >= final_x_size)\n"
|
||||
" error = 64;\n"
|
||||
" if (get_global_id(1) >= final_y_size)\n"
|
||||
" error = 128;\n"
|
||||
" if (get_global_id(2) >= final_z_size)\n"
|
||||
" error = 256;\n"
|
||||
"\n"
|
||||
" unsigned long t_address = (unsigned long)get_global_id(2)*(unsigned long)final_y_size*(unsigned long)final_x_size + \n"
|
||||
" (unsigned long)get_global_id(1)*(unsigned long)final_x_size + (unsigned long)get_global_id(0);\n"
|
||||
" if ((t_address >= start_address) && (t_address < end_address))\n"
|
||||
" dst[t_address-start_address]++;\n"
|
||||
" if (error)\n"
|
||||
" dst[t_address-start_address]|=error;\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void clear_memory(__global uint *dst)\n\n"
|
||||
"{\n"
|
||||
" dst[get_global_id(0)] = 0;\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
static const char *thread_dimension_kernel_code_atomic_not_long =
|
||||
"\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
|
||||
"__kernel void test_thread_dimension_atomic(__global uint *dst, \n"
|
||||
" uint final_x_size, uint final_y_size, uint final_z_size,\n"
|
||||
" uint start_address, uint end_address)\n"
|
||||
"{\n"
|
||||
" uint error = 0;\n"
|
||||
" if (get_global_id(0) >= final_x_size)\n"
|
||||
" error = 64;\n"
|
||||
" if (get_global_id(1) >= final_y_size)\n"
|
||||
" error = 128;\n"
|
||||
" if (get_global_id(2) >= final_z_size)\n"
|
||||
" error = 256;\n"
|
||||
"\n"
|
||||
" unsigned int t_address = (unsigned int)get_global_id(2)*(unsigned int)final_y_size*(unsigned int)final_x_size + \n"
|
||||
" (unsigned int)get_global_id(1)*(unsigned int)final_x_size + (unsigned int)get_global_id(0);\n"
|
||||
" if ((t_address >= start_address) && (t_address < end_address))\n"
|
||||
" atom_add(&dst[t_address-start_address], 1u);\n"
|
||||
" if (error)\n"
|
||||
" atom_or(&dst[t_address-start_address], error);\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void clear_memory(__global uint *dst)\n\n"
|
||||
"{\n"
|
||||
" dst[get_global_id(0)] = 0;\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
static const char *thread_dimension_kernel_code_not_atomic_not_long =
|
||||
"\n"
|
||||
"__kernel void test_thread_dimension_not_atomic(__global uint *dst, \n"
|
||||
" uint final_x_size, uint final_y_size, uint final_z_size,\n"
|
||||
" uint start_address, uint end_address)\n"
|
||||
"{\n"
|
||||
" uint error = 0;\n"
|
||||
" if (get_global_id(0) >= final_x_size)\n"
|
||||
" error = 64;\n"
|
||||
" if (get_global_id(1) >= final_y_size)\n"
|
||||
" error = 128;\n"
|
||||
" if (get_global_id(2) >= final_z_size)\n"
|
||||
" error = 256;\n"
|
||||
"\n"
|
||||
" unsigned int t_address = (unsigned int)get_global_id(2)*(unsigned int)final_y_size*(unsigned int)final_x_size + \n"
|
||||
" (unsigned int)get_global_id(1)*(unsigned int)final_x_size + (unsigned int)get_global_id(0);\n"
|
||||
" if ((t_address >= start_address) && (t_address < end_address))\n"
|
||||
" dst[t_address-start_address]++;\n"
|
||||
" if (error)\n"
|
||||
" dst[t_address-start_address]|=error;\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void clear_memory(__global uint *dst)\n\n"
|
||||
"{\n"
|
||||
" dst[get_global_id(0)] = 0;\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
|
||||
static size_t max_workgroup_size_for_clear_kernel;
|
||||
cl_kernel clear_memory_kernel = 0;
|
||||
|
||||
|
||||
char dim_str[128];
|
||||
char *
|
||||
print_dimensions(size_t x, size_t y, size_t z, cl_uint dim) {
|
||||
// Not thread safe...
|
||||
if (dim == 1) {
|
||||
snprintf(dim_str, 128, "[%d]", (int)x);
|
||||
} else if (dim == 2) {
|
||||
snprintf(dim_str, 128, "[%d x %d]", (int)x, (int)y);
|
||||
} else if (dim == 3) {
|
||||
snprintf(dim_str, 128, "[%d x %d x %d]", (int)x, (int)y, (int)z);
|
||||
} else {
|
||||
snprintf(dim_str, 128, "INVALID DIM: %d", dim);
|
||||
}
|
||||
return dim_str;
|
||||
}
|
||||
|
||||
char dim_str2[128];
|
||||
char *
|
||||
print_dimensions2(size_t x, size_t y, size_t z, cl_uint dim) {
|
||||
// Not thread safe...
|
||||
if (dim == 1) {
|
||||
snprintf(dim_str2, 128, "[%d]", (int)x);
|
||||
} else if (dim == 2) {
|
||||
snprintf(dim_str2, 128, "[%d x %d]", (int)x, (int)y);
|
||||
} else if (dim == 3) {
|
||||
snprintf(dim_str2, 128, "[%d x %d x %d]", (int)x, (int)y, (int)z);
|
||||
} else {
|
||||
snprintf(dim_str2, 128, "INVALID DIM: %d", dim);
|
||||
}
|
||||
return dim_str2;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
This tests thread dimensions by executing a kernel across a range of dimensions.
|
||||
Each kernel instance does an atomic write into a specific location in a buffer to
|
||||
ensure that the correct dimensions are run. To handle large dimensions, the kernel
|
||||
masks its execution region internally. This allows a small (128MB) buffer to be used
|
||||
for very large executions by running the kernel multiple times.
|
||||
*/
|
||||
int run_test(cl_context context, cl_command_queue queue, cl_kernel kernel, cl_mem array, cl_uint memory_size, cl_uint dimensions,
|
||||
cl_uint final_x_size, cl_uint final_y_size, cl_uint final_z_size,
|
||||
cl_uint local_x_size, cl_uint local_y_size, cl_uint local_z_size,
|
||||
int explict_local)
|
||||
{
|
||||
cl_uint errors = 0;
|
||||
size_t global_size[3], local_size[3];
|
||||
global_size[0] = final_x_size; local_size[0] = local_x_size;
|
||||
global_size[1] = final_y_size; local_size[1] = local_y_size;
|
||||
global_size[2] = final_z_size; local_size[2] = local_z_size;
|
||||
|
||||
cl_ulong start_valid_memory_address = 0;
|
||||
cl_ulong end_valid_memory_address = memory_size;
|
||||
cl_ulong last_memory_address = (cl_ulong)final_x_size*(cl_ulong)final_y_size*(cl_ulong)final_z_size*sizeof(cl_uint);
|
||||
if (end_valid_memory_address > last_memory_address)
|
||||
end_valid_memory_address = last_memory_address;
|
||||
|
||||
int number_of_iterations_required = (int)ceil((double)last_memory_address/(double)memory_size);
|
||||
log_info("\t\tTest requires %gMB (%d test iterations using an allocation of %gMB).\n",
|
||||
(double)last_memory_address/(1024.0*1024.0), number_of_iterations_required, (double)memory_size/(1024.0*1024.0));
|
||||
//log_info("Last memory address: %llu, memory_size: %llu\n", last_memory_address, memory_size);
|
||||
|
||||
while (end_valid_memory_address <= last_memory_address)
|
||||
{
|
||||
int err;
|
||||
// Clear the memory
|
||||
// // Manually -- much slower on the GPU
|
||||
// memset((void*)data, 0, memory_size);
|
||||
// err = clWriteArray(context, array, 0, 0, memory_size, data, NULL);
|
||||
// if (err != CL_SUCCESS) {
|
||||
// log_error("Failed to write to data array: %d\n", err);
|
||||
// free(data);
|
||||
// return -4;
|
||||
// }
|
||||
// In a kernel
|
||||
err = clSetKernelArg(clear_memory_kernel, 0, sizeof(array), &array);
|
||||
if (err != CL_SUCCESS) {
|
||||
print_error( err, "Failed to set args for clear_memory_kernel to clear the memory between runs");
|
||||
return -4;
|
||||
}
|
||||
size_t global[3] = {1,0,0};
|
||||
global[0] = (cl_uint)(memory_size/sizeof(cl_uint));
|
||||
size_t local[3] = {1,0,0};
|
||||
local[0] = max_workgroup_size_for_clear_kernel;
|
||||
while( global[0] % local[0] ) //make sure that global[0] is evenly divided by local[0]. Will stop at 1 in worst case.
|
||||
local[0]--;
|
||||
err = clEnqueueNDRangeKernel(queue, clear_memory_kernel, 1, NULL, global, local, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS) {
|
||||
print_error( err, "Failed to execute clear_memory_kernel to clear the memory between runs");
|
||||
return -4;
|
||||
}
|
||||
|
||||
cl_ulong start_valid_index = start_valid_memory_address/sizeof(cl_uint);
|
||||
cl_ulong end_valid_index = end_valid_memory_address/sizeof(cl_uint);
|
||||
|
||||
cl_uint start_valid_index_int = (cl_uint) start_valid_index;
|
||||
cl_uint end_valid_index_int = (cl_uint) end_valid_index;
|
||||
|
||||
// Set the arguments
|
||||
err = clSetKernelArg(kernel, 0, sizeof(array), &array);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(final_x_size), &final_x_size);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof(final_y_size), &final_y_size);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof(final_z_size), &final_z_size);
|
||||
if (gHasLong)
|
||||
{
|
||||
err |= clSetKernelArg(kernel, 4, sizeof(start_valid_index), &start_valid_index);
|
||||
err |= clSetKernelArg(kernel, 5, sizeof(end_valid_index), &end_valid_index);
|
||||
}
|
||||
else
|
||||
{
|
||||
err |= clSetKernelArg(kernel, 4, sizeof(start_valid_index_int), &start_valid_index_int);
|
||||
err |= clSetKernelArg(kernel, 5, sizeof(end_valid_index_int), &end_valid_index_int);
|
||||
}
|
||||
|
||||
if (err != CL_SUCCESS) {
|
||||
print_error( err, "Failed to set arguments.");
|
||||
return -3;
|
||||
}
|
||||
|
||||
|
||||
// Execute the kernel
|
||||
if (explict_local == 0) {
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, dimensions, NULL, global_size, NULL, 0, NULL, NULL);
|
||||
if (DEBUG) log_info("\t\t\tExecuting kernel with global %s, NULL local, %d dim, start address %llu, end address %llu.\n",
|
||||
print_dimensions(global_size[0], global_size[1], global_size[2], dimensions),
|
||||
dimensions, start_valid_memory_address, end_valid_memory_address);
|
||||
} else {
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, dimensions, NULL, global_size, local_size, 0, NULL, NULL);
|
||||
if (DEBUG) log_info("\t\t\tExecuting kernel with global %s, local %s, %d dim, start address %llu, end address %llu.\n",
|
||||
print_dimensions(global_size[0], global_size[1], global_size[2], dimensions), print_dimensions2(local_size[0], local_size[1], local_size[2], dimensions),
|
||||
dimensions, start_valid_memory_address, end_valid_memory_address);
|
||||
}
|
||||
if (err == CL_OUT_OF_RESOURCES) {
|
||||
log_info("WARNING: kernel reported CL_OUT_OF_RESOURCES, indicating the global dimensions are too large. Skipping this size.\n");
|
||||
return 0;
|
||||
}
|
||||
if (err != CL_SUCCESS) {
|
||||
print_error( err, "Failed to execute kernel\n");
|
||||
return -3;
|
||||
}
|
||||
|
||||
void* mapped = clEnqueueMapBuffer(queue, array, CL_TRUE, CL_MAP_READ, 0, memory_size, 0, NULL, NULL, &err );
|
||||
if (err != CL_SUCCESS) {
|
||||
print_error( err, "Failed to map results\n");
|
||||
return -4;
|
||||
}
|
||||
cl_uint* data = (cl_uint*)mapped;
|
||||
|
||||
// Verify the data
|
||||
cl_uint i;
|
||||
cl_uint last_address = (cl_uint)(end_valid_memory_address - start_valid_memory_address)/(cl_uint)sizeof(cl_uint);
|
||||
for (i=0; i<last_address; i++) {
|
||||
if (i < last_address) {
|
||||
if (data[i] != 1) {
|
||||
errors++;
|
||||
// log_info("%d expected 1 got %d\n", i, data[i]);
|
||||
}
|
||||
} else {
|
||||
if (data[i] != 0) {
|
||||
errors++;
|
||||
log_info("%d expected 0 got %d\n", i, data[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
err = clEnqueueUnmapMemObject(queue, array, mapped, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS) {
|
||||
print_error( err, "Failed to unmap results\n");
|
||||
return -4;
|
||||
}
|
||||
|
||||
err = clFlush(queue);
|
||||
if (err != CL_SUCCESS) {
|
||||
print_error( err, "Failed to flush\n");
|
||||
return -4;
|
||||
}
|
||||
|
||||
// Increment the addresses
|
||||
if (end_valid_memory_address == last_memory_address)
|
||||
break;
|
||||
start_valid_memory_address += memory_size;
|
||||
end_valid_memory_address += memory_size;
|
||||
if (end_valid_memory_address > last_memory_address)
|
||||
end_valid_memory_address = last_memory_address;
|
||||
}
|
||||
|
||||
if (errors)
|
||||
log_error("%d errors.\n", errors);
|
||||
return errors;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static cl_uint max_x_size=1, min_x_size=1, max_y_size=1, min_y_size=1, max_z_size=1, min_z_size=1;
|
||||
|
||||
static void set_min(cl_uint *x, cl_uint *y, cl_uint *z) {
|
||||
if (*x < min_x_size)
|
||||
*x = min_x_size;
|
||||
if (*y < min_y_size)
|
||||
*y = min_y_size;
|
||||
if (*z < min_z_size)
|
||||
*z = min_z_size;
|
||||
if (*x > max_x_size)
|
||||
*x = max_x_size;
|
||||
if (*y > max_y_size)
|
||||
*y = max_y_size;
|
||||
if (*z > max_z_size)
|
||||
*z = max_z_size;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_thread_dimensions(cl_device_id device, cl_context context, cl_command_queue queue, cl_uint dimensions, cl_uint min_dim, cl_uint max_dim, cl_uint quick_test, cl_uint size_increase_per_iteration, int explicit_local) {
|
||||
cl_mem array;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
int err;
|
||||
cl_uint memory_size, max_memory_size;
|
||||
size_t max_local_workgroup_size[3];
|
||||
cl_uint device_max_dimensions;
|
||||
int use_atomics = 1;
|
||||
MTdata d;
|
||||
|
||||
if (getenv("CL_WIMPY_MODE") && !quick_test) {
|
||||
log_info("CL_WIMPY_MODE enabled, skipping test\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Unconditionally test larger sizes for CL 1.1
|
||||
log_info("Testing large global dimensions.\n");
|
||||
limit_size = 0;
|
||||
|
||||
/* Check if atomics are supported. */
|
||||
if (!is_extension_available(device, "cl_khr_global_int32_base_atomics")) {
|
||||
log_info("WARNING: Base atomics not supported (cl_khr_global_int32_base_atomics). Test will not be guaranteed to catch overlaping thread dimensions.\n");
|
||||
use_atomics = 0;
|
||||
}
|
||||
|
||||
if (quick_test)
|
||||
log_info("WARNING: Running quick test. This will only test the base dimensions (power of two) and base-1 with all local threads fixed in one dim.\n");
|
||||
|
||||
// Verify that we can test this many dimensions
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(device_max_dimensions), &device_max_dimensions, NULL);
|
||||
test_error(err, "clGetDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
|
||||
|
||||
if (dimensions > device_max_dimensions) {
|
||||
log_info("Can not test %d dimensions when device only supports %d.\n", dimensions, device_max_dimensions);
|
||||
return 0;
|
||||
}
|
||||
|
||||
log_info("Setting random seed to 0.\n");
|
||||
|
||||
if (gHasLong) {
|
||||
if (use_atomics) {
|
||||
err = create_single_kernel_helper( context, &program, &kernel, 1, &thread_dimension_kernel_code_atomic_long, "test_thread_dimension_atomic" );
|
||||
} else {
|
||||
err = create_single_kernel_helper( context, &program, &kernel, 1, &thread_dimension_kernel_code_not_atomic_long, "test_thread_dimension_not_atomic" );
|
||||
}
|
||||
} else {
|
||||
if (use_atomics) {
|
||||
err = create_single_kernel_helper( context, &program, &kernel, 1, &thread_dimension_kernel_code_atomic_not_long, "test_thread_dimension_atomic" );
|
||||
} else {
|
||||
err = create_single_kernel_helper( context, &program, &kernel, 1, &thread_dimension_kernel_code_not_atomic_not_long, "test_thread_dimension_not_atomic" );
|
||||
}
|
||||
}
|
||||
test_error( err, "Unable to create testing kernel" );
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
clear_memory_kernel = clCreateKernel(program, "clear_memory", &err);
|
||||
if (err)
|
||||
{
|
||||
log_error("clCreateKernel failed: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get the maximum sizes supported by this device
|
||||
size_t max_workgroup_size = 0;
|
||||
size_t max_width = 0;
|
||||
size_t max_height = 0;
|
||||
cl_ulong max_allocation = 0;
|
||||
cl_ulong max_physical = 0;
|
||||
int found_size = 0;
|
||||
|
||||
err = get_maximums(kernel, context,
|
||||
&max_workgroup_size, &max_width, &max_height, &max_allocation, &max_physical);
|
||||
|
||||
err = get_maximums(clear_memory_kernel, context,
|
||||
&max_workgroup_size_for_clear_kernel, &max_width, &max_height, &max_allocation, &max_physical);
|
||||
|
||||
// Make sure we don't try to allocate more than half the physical memory present.
|
||||
if (max_allocation > (max_physical/2)) {
|
||||
log_info("Limiting max allocation to half of the maximum physical memory (%gMB of %gMB physical).\n",
|
||||
(max_physical/2/(1024.0*1024.0)), (max_physical/(1024.0*1024.0)));
|
||||
max_allocation = max_physical/2;
|
||||
}
|
||||
|
||||
// Limit the maximum we'll allocate for this test to 512 to be reasonable.
|
||||
if (max_allocation > 1024*1024*512) {
|
||||
log_info("Limiting max allocation to 512MB from device maximum allocation of %gMB.\n", (max_allocation/1024.0/1024.0));
|
||||
max_allocation = 1024*1024*512;
|
||||
}
|
||||
|
||||
max_memory_size = (cl_uint)(max_allocation);
|
||||
if (max_memory_size > 512*1024*1024)
|
||||
max_memory_size = 512*1024*1024;
|
||||
memory_size = max_memory_size;
|
||||
|
||||
log_info("Memory allocation size to use is %gMB, max workgroup size is %d.\n", max_memory_size/(1024.0*1024.0), (int)max_workgroup_size);
|
||||
|
||||
while (!found_size && memory_size >= max_memory_size/8) {
|
||||
array = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), memory_size, NULL, &err);
|
||||
if (err == CL_MEM_OBJECT_ALLOCATION_FAILURE || err == CL_OUT_OF_HOST_MEMORY) {
|
||||
memory_size -= max_memory_size/16;
|
||||
continue;
|
||||
}
|
||||
if (err) {
|
||||
print_error( err, "clCreateBuffer failed");
|
||||
return -1;
|
||||
}
|
||||
found_size = 1;
|
||||
}
|
||||
|
||||
if (!found_size) {
|
||||
log_error("Failed to find a working size greater than 1/8th of the reported allocation size.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (memory_size < max_memory_size) {
|
||||
log_info("Note: failed to allocate %gMB, using %gMB instead.\n", max_memory_size/(1024.0*1024.0), memory_size/(1024.0*1024.0));
|
||||
}
|
||||
|
||||
int errors = 0;
|
||||
// Each dimension's size is multiplied by this amount on each iteration.
|
||||
// uint size_increase_per_iteration = 4;
|
||||
// 1 test at the specified size
|
||||
// 2 tests with each dimensions +/- 1
|
||||
// 2 tests with all dimensions +/- 1
|
||||
// 2 random tests
|
||||
cl_uint tests_per_size = 1 + 2*dimensions + 2 + 2;
|
||||
|
||||
// 1 test with 1 as the local threads in each dimensions
|
||||
// 1 test with all the local threads in each dimension
|
||||
// 2 random tests
|
||||
cl_uint local_tests_per_size = 1 + dimensions + 2;
|
||||
if (explicit_local == 0)
|
||||
local_tests_per_size = 1;
|
||||
|
||||
max_x_size=1, min_x_size=1, max_y_size=1, min_y_size=1, max_z_size=1, min_z_size=1;
|
||||
|
||||
if (dimensions > 3) {
|
||||
log_error("Invalid dimensions: %d\n", dimensions);
|
||||
return -1;
|
||||
}
|
||||
max_x_size = max_dim;
|
||||
min_x_size = min_dim;
|
||||
if (dimensions > 1) {
|
||||
max_y_size = max_dim;
|
||||
min_y_size = min_dim;
|
||||
}
|
||||
if (dimensions > 2) {
|
||||
max_z_size = max_dim;
|
||||
min_z_size = min_dim;
|
||||
}
|
||||
|
||||
log_info("Testing with dimensions up to %s.\n", print_dimensions(max_x_size, max_y_size, max_z_size, dimensions));
|
||||
cl_uint x_size, y_size, z_size;
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
z_size = min_z_size;
|
||||
while (z_size <= max_z_size) {
|
||||
y_size = min_y_size;
|
||||
while (y_size <= max_y_size) {
|
||||
x_size = min_x_size;
|
||||
while (x_size <= max_x_size) {
|
||||
|
||||
log_info("Base test size %s:\n", print_dimensions(x_size, y_size, z_size, dimensions));
|
||||
|
||||
cl_uint sub_test;
|
||||
cl_uint final_x_size, final_y_size, final_z_size;
|
||||
for (sub_test = 0; sub_test < tests_per_size; sub_test++) {
|
||||
final_x_size = x_size;
|
||||
final_y_size = y_size;
|
||||
final_z_size = z_size;
|
||||
|
||||
if (sub_test == 0) {
|
||||
if (DEBUG) log_info("\tTesting with base dimensions %s.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
|
||||
} else if (quick_test) {
|
||||
// If we are in quick mode just do 1 run with x-1, y-1, and z-1.
|
||||
if (sub_test > 1)
|
||||
break;
|
||||
final_x_size--;
|
||||
final_y_size--;
|
||||
final_z_size--;
|
||||
set_min(&final_x_size, &final_y_size, &final_z_size);
|
||||
if (DEBUG) log_info("\tTesting with all base dimensions - 1 %s.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
|
||||
} else if (sub_test <= dimensions*2) {
|
||||
int dim_to_change = (sub_test-1)%dimensions;
|
||||
//log_info ("dim_to_change: %d (sub_test:%d) dimensions %d\n", dim_to_change,sub_test, dimensions);
|
||||
int up_down = (sub_test > dimensions) ? 0 : 1;
|
||||
|
||||
if (dim_to_change == 0) {
|
||||
final_x_size += (up_down) ? -1 : +1;
|
||||
} else if (dim_to_change == 1) {
|
||||
final_y_size += (up_down) ? -1 : +1;
|
||||
} else if (dim_to_change == 2) {
|
||||
final_z_size += (up_down) ? -1 : +1;
|
||||
} else {
|
||||
log_error("Invalid dim_to_change: %d\n", dim_to_change);
|
||||
return -1;
|
||||
}
|
||||
set_min(&final_x_size, &final_y_size, &final_z_size);
|
||||
if (DEBUG) log_info("\tTesting with one base dimension +/- 1 %s.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
|
||||
} else if (sub_test == (dimensions*2+1)) {
|
||||
if (dimensions == 1)
|
||||
continue;
|
||||
final_x_size--;
|
||||
final_y_size--;
|
||||
final_z_size--;
|
||||
set_min(&final_x_size, &final_y_size, &final_z_size);
|
||||
if (DEBUG) log_info("\tTesting with all base dimensions - 1 %s.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
|
||||
} else if (sub_test == (dimensions*2+2)) {
|
||||
if (dimensions == 1)
|
||||
continue;
|
||||
final_x_size++;
|
||||
final_y_size++;
|
||||
final_z_size++;
|
||||
set_min(&final_x_size, &final_y_size, &final_z_size);
|
||||
if (DEBUG) log_info("\tTesting with all base dimensions + 1 %s.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
|
||||
} else {
|
||||
final_x_size = (int)get_random_float(0, (x_size/size_increase_per_iteration), d)+x_size/size_increase_per_iteration;
|
||||
final_y_size = (int)get_random_float(0, (y_size/size_increase_per_iteration), d)+y_size/size_increase_per_iteration;
|
||||
final_z_size = (int)get_random_float(0, (z_size/size_increase_per_iteration), d)+z_size/size_increase_per_iteration;
|
||||
set_min(&final_x_size, &final_y_size, &final_z_size);
|
||||
if (DEBUG) log_info("\tTesting with random dimensions %s.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
|
||||
}
|
||||
|
||||
if (limit_size && final_x_size*final_y_size*final_z_size >= MAX_TOTAL_GLOBAL_THREADS_FOR_TEST) {
|
||||
log_info("Skipping size %s as it exceeds max test threads of %d.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions), MAX_TOTAL_GLOBAL_THREADS_FOR_TEST);
|
||||
continue;
|
||||
}
|
||||
|
||||
cl_uint local_test;
|
||||
cl_uint local_x_size, local_y_size, local_z_size;
|
||||
cl_uint previous_local_x_size=0, previous_local_y_size=0, previous_local_z_size=0;
|
||||
for (local_test = 0; local_test < local_tests_per_size; local_test++) {
|
||||
|
||||
local_x_size = 1;
|
||||
local_y_size = 1;
|
||||
local_z_size = 1;
|
||||
|
||||
if (local_test == 0) {
|
||||
} else if (local_test <= dimensions) {
|
||||
int dim_to_change = (local_test-1)%dimensions;
|
||||
if (dim_to_change == 0) {
|
||||
local_x_size = (cl_uint)max_workgroup_size;
|
||||
} else if (dim_to_change == 1) {
|
||||
local_y_size = (cl_uint)max_workgroup_size;
|
||||
} else if (dim_to_change == 2) {
|
||||
local_z_size = (cl_uint)max_workgroup_size;
|
||||
} else {
|
||||
log_error("Invalid dim_to_change: %d\n", dim_to_change);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
local_x_size = (int)get_random_float(1, (int)max_workgroup_size, d);
|
||||
while ((local_x_size > 1) && (final_x_size%local_x_size != 0))
|
||||
local_x_size--;
|
||||
int remainder = (int)floor((double)max_workgroup_size/local_x_size);
|
||||
// Evenly prefer dimensions 2 and 1 first
|
||||
if (local_test % 2) {
|
||||
if (dimensions > 1) {
|
||||
local_y_size = (int)get_random_float(1, (int)remainder, d);
|
||||
while ((local_y_size > 1) && (final_y_size%local_y_size != 0))
|
||||
local_y_size--;
|
||||
remainder = (int)floor((double)remainder/local_y_size);
|
||||
}
|
||||
if (dimensions > 2) {
|
||||
local_z_size = (int)get_random_float(1, (int)remainder, d);
|
||||
while ((local_z_size > 1) && (final_z_size%local_z_size != 0))
|
||||
local_z_size--;
|
||||
}
|
||||
} else {
|
||||
if (dimensions > 2) {
|
||||
local_z_size = (int)get_random_float(1, (int)remainder, d);
|
||||
while ((local_z_size > 1) && (final_z_size%local_z_size != 0))
|
||||
local_z_size--;
|
||||
remainder = (int)floor((double)remainder/local_z_size);
|
||||
}
|
||||
if (dimensions > 1) {
|
||||
local_y_size = (int)get_random_float(1, (int)remainder, d);
|
||||
while ((local_y_size > 1) && (final_y_size%local_y_size != 0))
|
||||
local_y_size--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Put all the threads in one dimension to speed up the test in quick mode.
|
||||
if (quick_test) {
|
||||
local_y_size = 1;
|
||||
local_z_size = 1;
|
||||
local_x_size = 1;
|
||||
if (final_z_size > final_y_size && final_z_size > final_x_size)
|
||||
local_z_size = (cl_uint)max_workgroup_size;
|
||||
else if (final_y_size > final_x_size)
|
||||
local_y_size = (cl_uint)max_workgroup_size;
|
||||
else
|
||||
local_x_size = (cl_uint)max_workgroup_size;
|
||||
}
|
||||
|
||||
if (local_x_size > max_local_workgroup_size[0])
|
||||
local_x_size = (int)max_local_workgroup_size[0];
|
||||
if (dimensions > 1 && local_y_size > max_local_workgroup_size[1])
|
||||
local_y_size = (int)max_local_workgroup_size[1];
|
||||
if (dimensions > 2 && local_z_size > max_local_workgroup_size[2])
|
||||
local_z_size = (int)max_local_workgroup_size[2];
|
||||
|
||||
// Cleanup the local dimensions
|
||||
while ((local_x_size > 1) && (final_x_size%local_x_size != 0))
|
||||
local_x_size--;
|
||||
while ((local_y_size > 1) && (final_y_size%local_y_size != 0))
|
||||
local_y_size--;
|
||||
while ((local_z_size > 1) && (final_z_size%local_z_size != 0))
|
||||
local_z_size--;
|
||||
if ((previous_local_x_size == local_x_size) && (previous_local_y_size == local_y_size) && (previous_local_z_size == local_z_size))
|
||||
continue;
|
||||
|
||||
if (explicit_local == 0) {
|
||||
local_x_size = 0;
|
||||
local_y_size = 0;
|
||||
local_z_size = 0;
|
||||
}
|
||||
|
||||
if (DEBUG) log_info("\t\tTesting local size %s.\n", print_dimensions(local_x_size, local_y_size, local_z_size, dimensions));
|
||||
|
||||
if (explicit_local == 0) {
|
||||
log_info("\tTesting global %s local [NULL]...\n",
|
||||
print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
|
||||
} else {
|
||||
log_info("\tTesting global %s local %s...\n",
|
||||
print_dimensions(final_x_size, final_y_size, final_z_size, dimensions),
|
||||
print_dimensions2(local_x_size, local_y_size, local_z_size, dimensions));
|
||||
}
|
||||
|
||||
// Avoid running with very small local sizes on very large global sizes
|
||||
cl_uint total_local_size = local_x_size * local_y_size * local_z_size;
|
||||
long total_global_size = final_x_size * final_y_size * final_z_size;
|
||||
if (total_local_size < max_workgroup_size) {
|
||||
if (total_global_size > 16384*16384) {
|
||||
if (total_local_size < 64) {
|
||||
log_info("Skipping test as local_size is small and it will take a long time.\n");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
err = run_test(context, queue, kernel, array, memory_size, dimensions,
|
||||
final_x_size, final_y_size, final_z_size,
|
||||
local_x_size, local_y_size, local_z_size, explicit_local);
|
||||
|
||||
// If we failed to execute, then return so we don't crash.
|
||||
if (err < 0) {
|
||||
clReleaseMemObject(array);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Otherwise, if we had errors add them up.
|
||||
if (err) {
|
||||
log_error("Test global %s local %s failed.\n",
|
||||
print_dimensions(final_x_size, final_y_size, final_z_size, dimensions),
|
||||
print_dimensions2(local_x_size, local_y_size, local_z_size, dimensions));
|
||||
errors++;
|
||||
clReleaseMemObject(array);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseKernel(clear_memory_kernel);
|
||||
clReleaseProgram(program);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
previous_local_x_size = local_x_size;
|
||||
previous_local_y_size = local_y_size;
|
||||
previous_local_z_size = local_z_size;
|
||||
|
||||
// Only test one config in quick mode.
|
||||
if (quick_test)
|
||||
break;
|
||||
} // local_test size
|
||||
} // sub_test
|
||||
// Increment the x_size
|
||||
if (x_size == max_x_size)
|
||||
break;
|
||||
x_size *= size_increase_per_iteration;
|
||||
if (x_size > max_x_size)
|
||||
x_size = max_x_size;
|
||||
} // x_size
|
||||
// Increment the y_size
|
||||
if (y_size == max_y_size)
|
||||
break;
|
||||
y_size *= size_increase_per_iteration;
|
||||
if (y_size > max_y_size)
|
||||
y_size = max_y_size;
|
||||
} // y_size
|
||||
// Increment the z_size
|
||||
if (z_size == max_z_size)
|
||||
break;
|
||||
z_size *= size_increase_per_iteration;
|
||||
if (z_size > max_z_size)
|
||||
z_size = max_z_size;
|
||||
} // z_size
|
||||
|
||||
|
||||
free_mtdata(d);
|
||||
clReleaseMemObject(array);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseKernel(clear_memory_kernel);
|
||||
clReleaseProgram(program);
|
||||
if (errors)
|
||||
log_error("%d total errors.\n", errors);
|
||||
return errors;
|
||||
|
||||
}
|
||||
|
||||
#define QUICK 1
|
||||
#define FULL 0
|
||||
|
||||
int
|
||||
test_quick_thread_dimensions_1d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
||||
return test_thread_dimensions(deviceID, context, queue,
|
||||
1, 1, 65536*512, QUICK, 4, 1);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_quick_thread_dimensions_2d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
||||
return test_thread_dimensions(deviceID, context, queue,
|
||||
2, 1, 65536/4, QUICK, 16, 1);
|
||||
}
|
||||
|
||||
int
|
||||
test_quick_thread_dimensions_3d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
||||
return test_thread_dimensions(deviceID, context, queue,
|
||||
3, 1, 1024, QUICK, 32, 1);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_quick_thread_dimensions_1d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
||||
return test_thread_dimensions(deviceID, context, queue,
|
||||
1, 1, 65536*256, QUICK, 4, 0);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_quick_thread_dimensions_2d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
||||
return test_thread_dimensions(deviceID, context, queue,
|
||||
2, 1, 65536/4, QUICK, 16, 0);
|
||||
}
|
||||
|
||||
int
|
||||
test_quick_thread_dimensions_3d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
||||
return test_thread_dimensions(deviceID, context, queue,
|
||||
3, 1, 1024, QUICK, 32, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
int
|
||||
test_full_thread_dimensions_1d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
||||
return test_thread_dimensions(deviceID, context, queue,
|
||||
1, 1, 65536*512, FULL, 4, 1);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_full_thread_dimensions_2d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
||||
return test_thread_dimensions(deviceID, context, queue,
|
||||
2, 1, 65536/4, FULL, 16, 1);
|
||||
}
|
||||
|
||||
int
|
||||
test_full_thread_dimensions_3d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
||||
return test_thread_dimensions(deviceID, context, queue,
|
||||
3, 1, 1024, FULL, 32, 1);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_full_thread_dimensions_1d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
||||
return test_thread_dimensions(deviceID, context, queue,
|
||||
1, 1, 65536*256, FULL, 4, 0);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_full_thread_dimensions_2d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
||||
return test_thread_dimensions(deviceID, context, queue,
|
||||
2, 1, 65536/4, FULL, 16, 0);
|
||||
}
|
||||
|
||||
int
|
||||
test_full_thread_dimensions_3d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
||||
return test_thread_dimensions(deviceID, context, queue,
|
||||
3, 1, 1024, FULL, 32, 0);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user