Initial open source release of OpenCL 2.0 CTS.

This commit is contained in:
Kedar Patil
2017-05-16 18:50:35 +05:30
parent 6911ba5116
commit 3a440d17c8
883 changed files with 318212 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
add_subdirectory(api)
add_subdirectory(basic)

View File

@@ -0,0 +1,34 @@
set(MODULE_NAME COMPATIBILITY_API)
set(${MODULE_NAME}_SOURCES
main.c
test_bool.c
test_retain.cpp
test_retain_program.c
test_queries.cpp
test_create_kernels.c
test_kernels.c
test_api_min_max.c
test_kernel_arg_changes.cpp
test_kernel_arg_multi_setup.cpp
test_binary.cpp
test_native_kernel.cpp
test_mem_objects.cpp
test_create_context_from_type.cpp
test_device_min_data_type_align_size_alignment.cpp
test_platform.cpp
test_mem_object_info.cpp
test_null_buffer_arg.c
test_kernel_arg_info.c
../../test_common/harness/errorHelpers.c
../../test_common/harness/threadTesting.c
../../test_common/harness/testHarness.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/typeWrappers.cpp
../../test_common/harness/conversions.c
../../test_common/harness/mt19937.c
../../test_common/harness/msvc9.c
../../test_common/harness/imageHelpers.cpp
)
include(../../../CMakeCommon.txt)

View File

@@ -0,0 +1,27 @@
project
: requirements
<toolset>gcc:<cflags>-xc++
<toolset>msvc:<cflags>"/TP"
;
exe test_api
: main.c
test_api_min_max.c
test_binary.cpp
test_create_kernels.c
test_create_context_from_type.cpp
test_kernel_arg_changes.cpp
test_kernel_arg_multi_setup.cpp
test_kernels.c
test_native_kernel.cpp
test_queries.cpp
test_retain_program.c
test_platform.cpp
;
install dist
: test_api #test.lst
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/api
<variant>release:<location>$(DIST)/release/tests/test_conformance/api
;

View File

@@ -0,0 +1,61 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.c \
test_retain_program.c \
test_queries.cpp \
test_create_kernels.c \
test_kernels.c \
test_kernel_arg_info.c \
test_api_min_max.c \
test_kernel_arg_changes.cpp \
test_kernel_arg_multi_setup.cpp \
test_binary.cpp \
test_native_kernel.cpp \
test_create_context_from_type.cpp \
test_platform.cpp \
test_retain.cpp \
test_device_min_data_type_align_size_alignment.cpp \
test_mem_objects.cpp \
test_bool.c \
test_null_buffer_arg.c \
test_mem_object_info.cpp \
../../test_common/harness/errorHelpers.c \
../../test_common/harness/threadTesting.c \
../../test_common/harness/testHarness.c \
../../test_common/harness/imageHelpers.cpp \
../../test_common/harness/kernelHelpers.c \
../../test_common/harness/typeWrappers.cpp \
../../test_common/harness/mt19937.c \
../../test_common/harness/conversions.c
DEFINES = DONT_TEST_GARBAGE_POINTERS
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
HEADERS =
TARGET = test_api
INCLUDE =
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,214 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include "procs.h"
#include "../../test_common/harness/testHarness.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
// (for example, generate_random_image_data()), the tests are required to declare
// the following variables:
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
bool gTestRounding = false;
basefn basefn_list[] = {
test_get_platform_info,
test_get_sampler_info,
test_get_command_queue_info,
test_get_context_info,
test_get_device_info,
test_enqueue_task,
test_binary_get,
test_program_binary_create,
test_kernel_required_group_size,
test_release_kernel_order,
test_release_during_execute,
test_load_single_kernel,
test_load_two_kernels,
test_load_two_kernels_in_one,
test_load_two_kernels_manually,
test_get_program_info_kernel_names,
test_get_kernel_arg_info,
test_create_kernels_in_program,
test_get_kernel_info,
test_execute_kernel_local_sizes,
test_set_kernel_arg_by_index,
test_set_kernel_arg_constant,
test_set_kernel_arg_struct_array,
test_kernel_global_constant,
test_min_max_thread_dimensions,
test_min_max_work_items_sizes,
test_min_max_work_group_size,
test_min_max_read_image_args,
test_min_max_write_image_args,
test_min_max_mem_alloc_size,
test_min_max_image_2d_width,
test_min_max_image_2d_height,
test_min_max_image_3d_width,
test_min_max_image_3d_height,
test_min_max_image_3d_depth,
test_min_max_image_array_size,
test_min_max_image_buffer_size,
test_min_max_parameter_size,
test_min_max_samplers,
test_min_max_constant_buffer_size,
test_min_max_constant_args,
test_min_max_compute_units,
test_min_max_address_bits,
test_min_max_single_fp_config,
test_min_max_double_fp_config,
test_min_max_local_mem_size,
test_min_max_kernel_preferred_work_group_size_multiple,
test_min_max_execution_capabilities,
test_min_max_queue_properties,
test_min_max_device_version,
test_min_max_language_version,
test_kernel_arg_changes,
test_kernel_arg_multi_setup_random,
test_native_kernel,
test_create_context_from_type,
test_platform_extensions,
test_get_platform_ids,
test_for_bool_type,
test_repeated_setup_cleanup,
test_retain_queue_single,
test_retain_queue_multiple,
test_retain_mem_object_single,
test_retain_mem_object_multiple,
test_min_data_type_align_size_alignment,
test_mem_object_destructor_callback,
test_null_buffer_arg,
test_get_buffer_info,
test_get_image2d_info,
test_get_image3d_info,
test_get_image1d_info,
test_get_image1d_array_info,
test_get_image2d_array_info,
};
const char *basefn_names[] = {
"get_platform_info",
"get_sampler_info",
"get_command_queue_info",
"get_context_info",
"get_device_info",
"enqueue_task",
"binary_get",
"binary_create",
"kernel_required_group_size",
"release_kernel_order",
"release_during_execute",
"load_single_kernel",
"load_two_kernels",
"load_two_kernels_in_one",
"load_two_kernels_manually",
"get_program_info_kernel_names",
"get_kernel_arg_info",
"create_kernels_in_program",
"get_kernel_info",
"execute_kernel_local_sizes",
"set_kernel_arg_by_index",
"set_kernel_arg_constant",
"set_kernel_arg_struct_array",
"kernel_global_constant",
"min_max_thread_dimensions",
"min_max_work_items_sizes",
"min_max_work_group_size",
"min_max_read_image_args",
"min_max_write_image_args",
"min_max_mem_alloc_size",
"min_max_image_2d_width",
"min_max_image_2d_height",
"min_max_image_3d_width",
"min_max_image_3d_height",
"min_max_image_3d_depth",
"min_max_image_array_size",
"min_max_image_buffer_size",
"min_max_parameter_size",
"min_max_samplers",
"min_max_constant_buffer_size",
"min_max_constant_args",
"min_max_compute_units",
"min_max_address_bits",
"min_max_single_fp_config",
"min_max_double_fp_config",
"min_max_local_mem_size",
"min_max_kernel_preferred_work_group_size_multiple",
"min_max_execution_capabilities",
"min_max_queue_properties",
"min_max_device_version",
"min_max_language_version",
"kernel_arg_changes",
"kernel_arg_multi_setup_random",
"native_kernel",
"create_context_from_type",
"platform_extensions",
"get_platform_ids",
"bool_type",
"repeated_setup_cleanup",
"retain_queue_single",
"retain_queue_multiple",
"retain_mem_object_single",
"retain_mem_object_multiple",
"min_data_type_align_size_alignment",
"mem_object_destructor_callback",
"null_buffer_arg",
"get_buffer_info",
"get_image2d_info",
"get_image3d_info",
"get_image1d_info",
"get_image1d_array_info",
"get_image2d_array_info",
};
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
int num_fns = sizeof(basefn_names) / sizeof(char *);
int main(int argc, const char *argv[])
{
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
}

View File

@@ -0,0 +1,108 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/clImageHelper.h"
#include "../../test_common/harness/imageHelpers.h"
extern float calculate_ulperror(float a, float b);
extern int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_load_two_kernels_manually(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_for_bool_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_platform_extensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_release_during_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_arg_changes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_arg_multi_setup_random(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_null_buffer_arg( cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements );
extern int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );

View File

@@ -0,0 +1,31 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _testBase_h
#define _testBase_h
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#endif // _testBase_h

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,226 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
static const char *sample_binary_kernel_source[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid] + 1;\n"
"\n"
"}\n" };
int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
size_t binarySize;
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
test_error( error, "Unable to create program from source" );
// Build so we have a binary to get
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build test program" );
// Get the size of the resulting binary (only one device)
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
test_error( error, "Unable to get binary size" );
// Sanity check
if( binarySize == 0 )
{
log_error( "ERROR: Binary size of program is zero\n" );
return -1;
}
// Create a buffer and get the actual binary
unsigned char *binary;
binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
unsigned char *buffers[ 1 ] = { binary };
// Do another sanity check here first
size_t size;
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
test_error( error, "Unable to get expected size of binaries array" );
if( size != sizeof( buffers ) )
{
log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d)\n", (int)sizeof( buffers ), (int)size );
free(binary);
return -1;
}
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary" );
// No way to verify the binary is correct, so just be good with that
free(binary);
return 0;
}
int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
/* To test this in a self-contained fashion, we have to create a program with
source, then get the binary, then use that binary to reload the program, and then verify */
int error;
clProgramWrapper program, program_from_binary;
size_t binarySize;
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
test_error( error, "Unable to create program from source" );
// Build so we have a binary to get
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build test program" );
// Get the size of the resulting binary (only one device)
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
test_error( error, "Unable to get binary size" );
// Sanity check
if( binarySize == 0 )
{
log_error( "ERROR: Binary size of program is zero\n" );
return -1;
}
// Create a buffer and get the actual binary
unsigned char *binary = (unsigned char*)malloc(binarySize);
const unsigned char *buffers[ 1 ] = { binary };
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary" );
cl_int loadErrors[ 1 ];
program_from_binary = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, loadErrors, &error );
test_error( error, "Unable to load valid program binary" );
test_error( loadErrors[ 0 ], "Unable to load valid device binary into program" );
error = clBuildProgram( program_from_binary, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build binary program" );
// Get the size of the binary built from the first binary
size_t binary2Size;
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARY_SIZES, sizeof( binary2Size ), &binary2Size, NULL );
test_error( error, "Unable to get size for the binary program" );
// Now get the binary one more time and verify it loaded the right binary
unsigned char *binary2 = (unsigned char*)malloc(binary2Size);
buffers[ 0 ] = binary2;
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary second time" );
// Try again, this time without passing the status ptr in, to make sure we still
// get a valid binary
clProgramWrapper programWithoutStatus = clCreateProgramWithBinary( context, 1, &deviceID, &binary2Size, buffers, NULL, &error );
test_error( error, "Unable to load valid program binary when binary_status pointer is NULL" );
error = clBuildProgram( programWithoutStatus, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build binary program created without binary_status" );
// Get the size of the binary created without passing binary_status
size_t binary3Size;
error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARY_SIZES, sizeof( binary3Size ), &binary3Size, NULL );
test_error( error, "Unable to get size for the binary program created without binary_status" );
// Now get the binary one more time
unsigned char *binary3 = (unsigned char*)malloc(binary3Size);
buffers[ 0 ] = binary3;
error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary from the program created without binary_status" );
// We no longer need these intermediate binaries
free(binary);
free(binary2);
free(binary3);
// Now execute them both to see that they both do the same thing.
clMemWrapper in, out, out_binary;
clKernelWrapper kernel, kernel_binary;
cl_int *out_data, *out_data_binary;
cl_float *in_data;
size_t size_to_run = 1000;
// Allocate some data
in_data = (cl_float*)malloc(sizeof(cl_float)*size_to_run);
out_data = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
out_data_binary = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
memset(out_data, 0, sizeof(cl_int)*size_to_run);
memset(out_data_binary, 0, sizeof(cl_int)*size_to_run);
for (size_t i=0; i<size_to_run; i++)
in_data[i] = (cl_float)i;
// Create the buffers
in = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*size_to_run, in_data, &error);
test_error( error, "clCreateBuffer failed");
out = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data, &error);
test_error( error, "clCreateBuffer failed");
out_binary = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data_binary, &error);
test_error( error, "clCreateBuffer failed");
// Create the kernels
kernel = clCreateKernel(program, "sample_test", &error);
test_error( error, "clCreateKernel failed");
kernel_binary = clCreateKernel(program_from_binary, "sample_test", &error);
test_error( error, "clCreateKernel from binary failed");
// Set the arguments
error = clSetKernelArg(kernel, 0, sizeof(in), &in);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 1, sizeof(out), &out);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_binary, 0, sizeof(in), &in);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_binary, 1, sizeof(out_binary), &out_binary);
test_error( error, "clSetKernelArg failed");
// Execute the kernels
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel failed");
error = clEnqueueNDRangeKernel(queue, kernel_binary, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel for binary kernel failed");
// Finish up
error = clFinish(queue);
test_error( error, "clFinish failed");
// Get the results back
error = clEnqueueReadBuffer(queue, out, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed");
error = clEnqueueReadBuffer(queue, out_binary, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data_binary, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed");
// Compare the results
if( memcmp( out_data, out_data_binary, sizeof(cl_int)*size_to_run ) != 0 )
{
log_error( "ERROR: Results from executing binary and regular kernel differ.\n" );
return -1;
}
// All done!
free(in_data);
free(out_data);
free(out_data_binary);
return 0;
}

View File

@@ -0,0 +1,52 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/testHarness.h"
const char *kernel_with_bool[] = {
"__kernel void kernel_with_bool(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" bool myBool = (src[tid] < 0.5f) && (src[tid] > -0.5f);\n"
" if(myBool)\n"
" {\n"
" dst[tid] = (int)src[tid];\n"
" }\n"
" else\n"
" {\n"
" dst[tid] = 0;\n"
" }\n"
"\n"
"}\n"
};
int test_for_bool_type(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
cl_program program;
cl_kernel kernel;
int err = create_single_kernel_helper(context,
&program,
&kernel,
1, kernel_with_bool,
"kernel_with_bool" );
return err;
}

View File

@@ -0,0 +1,130 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#ifndef _WIN32
#include <unistd.h>
#endif
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/conversions.h"
extern cl_uint gRandomSeed;
int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
clContextWrapper context_to_test;
clCommandQueueWrapper queue_to_test;
size_t threads[1], localThreads[1];
cl_float inputData[10];
cl_int outputData[10];
int i;
RandomSeed seed( gRandomSeed );
const char *sample_single_test_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
cl_device_type type;
error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed\n");
cl_platform_id platform;
error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed\n");
cl_context_properties properties[3] = {
(cl_context_properties)CL_CONTEXT_PLATFORM,
(cl_context_properties)platform,
NULL
};
context_to_test = clCreateContextFromType(properties, type, notify_callback, NULL, &error);
test_error(error, "clCreateContextFromType failed");
if (context_to_test == NULL) {
log_error("clCreateContextFromType returned NULL, but error was CL_SUCCESS.");
return -1;
}
queue_to_test = clCreateCommandQueue(context_to_test, deviceID, NULL, &error);
test_error(error, "clCreateCommandQueue failed");
if (queue_to_test == NULL) {
log_error("clCreateCommandQueue returned NULL, but error was CL_SUCCESS.");
return -1;
}
/* Create a kernel to test with */
if( create_single_kernel_helper( context_to_test, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Write some test data */
memset( outputData, 0, sizeof( outputData ) );
for (i=0; i<10; i++)
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
error = clEnqueueWriteBuffer(queue_to_test, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
test_error( error, "Unable to set testing kernel data" );
/* Test setting the arguments by index manually */
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context_to_test, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue_to_test, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue_to_test, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,643 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/testHarness.h"
const char *sample_single_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
size_t sample_single_kernel_lengths[1];
const char *sample_two_kernels[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n",
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n" };
size_t sample_two_kernel_lengths[2];
const char *sample_two_kernels_in_1[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n" };
size_t sample_two_kernels_in_1_lengths[1];
const char *repeate_test_kernel =
"__kernel void test_kernel(__global int *src, __global int *dst)\n"
"{\n"
" dst[get_global_id(0)] = src[get_global_id(0)]+1;\n"
"}\n";
int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
cl_program testProgram;
clKernelWrapper kernel;
cl_context testContext;
unsigned int numKernels;
cl_char testName[512];
cl_uint testArgCount;
size_t realSize;
/* Preprocess: calc the length of each source file line */
sample_single_kernel_lengths[ 0 ] = strlen( sample_single_kernel[ 0 ] );
/* Create a program */
program = clCreateProgramWithSource( context, 1, sample_single_kernel, sample_single_kernel_lengths, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create single kernel program" );
return -1;
}
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build single kernel program" );
error = clCreateKernelsInProgram(program, 1, &kernel, &numKernels);
test_error( error, "Unable to create single kernel program" );
/* Check program and context pointers */
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( cl_program ), &testProgram, &realSize );
test_error( error, "Unable to get kernel's program" );
if( (cl_program)testProgram != (cl_program)program )
{
log_error( "ERROR: Returned kernel's program does not match program used to create it! (Got %p, expected %p)\n", (cl_program)testProgram, (cl_program)program );
return -1;
}
if( realSize != sizeof( cl_program ) )
{
log_error( "ERROR: Returned size of kernel's program does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_program ), (int)realSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( cl_context ), &testContext, &realSize );
test_error( error, "Unable to get kernel's context" );
if( (cl_context)testContext != (cl_context)context )
{
log_error( "ERROR: Returned kernel's context does not match program used to create it! (Got %p, expected %p)\n", (cl_context)testContext, (cl_context)context );
return -1;
}
if( realSize != sizeof( cl_context ) )
{
log_error( "ERROR: Returned size of kernel's context does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_context ), (int)realSize );
return -1;
}
/* Test arg count */
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &realSize );
test_error( error, "Unable to get size of arg count info from kernel" );
if( realSize != sizeof( testArgCount ) )
{
log_error( "ERROR: size of arg count not valid! %d\n", (int)realSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
test_error( error, "Unable to get arg count from kernel" );
if( testArgCount != 2 )
{
log_error( "ERROR: Kernel arg count does not match!\n" );
return -1;
}
/* Test function name */
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, &realSize );
test_error( error, "Unable to get name from kernel" );
if( strcmp( (char *)testName, "sample_test" ) != 0 )
{
log_error( "ERROR: Kernel names do not match!\n" );
return -1;
}
if( realSize != strlen( (char *)testName ) + 1 )
{
log_error( "ERROR: Length of kernel name returned does not validate (expected %d, got %d)\n", (int)strlen( (char *)testName ) + 1, (int)realSize );
return -1;
}
/* All done */
return 0;
}
int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel[2];
unsigned int numKernels;
cl_char testName[ 512 ];
cl_uint testArgCount;
/* Preprocess: calc the length of each source file line */
sample_two_kernel_lengths[ 0 ] = strlen( sample_two_kernels[ 0 ] );
sample_two_kernel_lengths[ 1 ] = strlen( sample_two_kernels[ 1 ] );
/* Now create a test program */
program = clCreateProgramWithSource( context, 2, sample_two_kernels, sample_two_kernel_lengths, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create dual kernel program!" );
return -1;
}
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build dual kernel program" );
error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
test_error( error, "Unable to create dual kernel program" );
if( numKernels != 2 )
{
log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
return -1;
}
/* Check first kernel */
error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from kernel" );
int found_kernel1 = 0, found_kernel2 = 0;
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from second kernel" );
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
if (found_kernel1) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
if (found_kernel2) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
if( !found_kernel1 || !found_kernel2 )
{
log_error( "ERROR: Kernel names do not match.\n" );
if (!found_kernel1)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
if (!found_kernel2)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
return -1;
}
error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
test_error( error, "Unable to get arg count from kernel" );
if( testArgCount != 2 )
{
log_error( "ERROR: wrong # of args for kernel\n" );
return -1;
}
/* All done */
return 0;
}
int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel[2];
unsigned int numKernels;
cl_char testName[512];
cl_uint testArgCount;
/* Preprocess: calc the length of each source file line */
sample_two_kernels_in_1_lengths[ 0 ] = strlen( sample_two_kernels_in_1[ 0 ] );
/* Now create a test program */
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, sample_two_kernels_in_1_lengths, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create dual kernel program" );
return -1;
}
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build dual kernel program" );
error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
test_error( error, "Unable to create dual kernel program" );
if( numKernels != 2 )
{
log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
return -1;
}
/* Check first kernel */
error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from kernel" );
int found_kernel1 = 0, found_kernel2 = 0;
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
test_error( error, "Unable to get arg count from kernel" );
if( testArgCount != 2 )
{
log_error( "ERROR: wrong # of args for kernel\n" );
return -1;
}
/* Check second kernel */
error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from kernel" );
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
if (found_kernel1) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
if (found_kernel2) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
if( !found_kernel1 || !found_kernel2 )
{
log_error( "ERROR: Kernel names do not match.\n" );
if (!found_kernel1)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
if (!found_kernel2)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
return -1;
}
/* All done */
return 0;
}
int test_load_two_kernels_manually( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel1, kernel2;
int error;
/* Now create a test program */
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create dual kernel program" );
return -1;
}
/* Compile the program */
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build kernel program" );
/* Try manually creating kernels (backwards just in case) */
kernel1 = clCreateKernel( program, "sample_test2", &error );
if( kernel1 == NULL || error != CL_SUCCESS )
{
print_error( error, "Could not get kernel 1" );
return -1;
}
kernel2 = clCreateKernel( program, "sample_test", &error );
if( kernel2 == NULL )
{
print_error( error, "Could not get kernel 2" );
return -1;
}
return 0;
}
int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel1, kernel2;
int error;
size_t i;
/* Now create a test program */
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create dual kernel program" );
return -1;
}
/* Compile the program */
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build kernel program" );
/* Lookup the number of kernels in the program. */
size_t total_kernels = 0;
error = clGetProgramInfo(program, CL_PROGRAM_NUM_KERNELS, sizeof(size_t),&total_kernels,NULL);
test_error( error, "Unable to get program info num kernels");
if (total_kernels != 2)
{
print_error( error, "Program did not contain two kernels" );
return -1;
}
/* Lookup the kernel names. */
const char* actual_names[] = { "sample_test;sample_test2", "sample_test2;sample_test"} ;
size_t kernel_names_len = 0;
error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,0,NULL,&kernel_names_len);
test_error( error, "Unable to get length of kernel names list." );
if (kernel_names_len != (strlen(actual_names[0])+1))
{
print_error( error, "Kernel names length did not match");
return -1;
}
const size_t len = (kernel_names_len+1)*sizeof(char);
char* kernel_names = (char*)malloc(len);
error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,len,kernel_names,&kernel_names_len);
test_error( error, "Unable to get kernel names list." );
/* Check to see if the kernel name array is null terminated. */
if (kernel_names[kernel_names_len-1] != '\0')
{
free(kernel_names);
print_error( error, "Kernel name list was not null terminated");
return -1;
}
/* Check to see if the correct kernel name string was returned. */
for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
if( 0 == strcmp(actual_names[i],kernel_names) )
break;
if (i == sizeof( actual_names ) / sizeof( actual_names[0] ) )
{
free(kernel_names);
log_error( "Kernel names \"%s\" did not match:\n", kernel_names );
for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
log_error( "\t\t\"%s\"\n", actual_names[0] );
return -1;
}
free(kernel_names);
/* Try manually creating kernels (backwards just in case) */
kernel1 = clCreateKernel( program, "sample_test", &error );
if( kernel1 == NULL || error != CL_SUCCESS )
{
print_error( error, "Could not get kernel 1" );
return -1;
}
kernel2 = clCreateKernel( program, "sample_test2", &error );
if( kernel2 == NULL )
{
print_error( error, "Could not get kernel 2" );
return -1;
}
return 0;
}
static const char *single_task_kernel[] = {
"__kernel void sample_test(__global int *dst, int count)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" for( int i = 0; i < count; i++ )\n"
" dst[i] = tid + i;\n"
"\n"
"}\n" };
int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper output;
cl_int count;
if( create_single_kernel_helper( context, &program, &kernel, 1, single_task_kernel, "sample_test" ) )
return -1;
// Create args
count = 100;
output = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * count, NULL, &error );
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &output );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( cl_int ), &count );
test_error( error, "Unable to set kernel argument" );
// Run task
error = clEnqueueTask( queue, kernel, 0, NULL, NULL );
test_error( error, "Unable to run task" );
// Read results
cl_int *results = (cl_int*)malloc(sizeof(cl_int)*count);
error = clEnqueueReadBuffer( queue, output, CL_TRUE, 0, sizeof( cl_int ) * count, results, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Validate
for( cl_int i = 0; i < count; i++ )
{
if( results[ i ] != i )
{
log_error( "ERROR: Task result value %d did not validate! Expected %d, got %d\n", (int)i, (int)i, (int)results[ i ] );
free(results);
return -1;
}
}
/* All done */
free(results);
return 0;
}
#define TEST_SIZE 1000
int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_context local_context;
cl_command_queue local_queue;
cl_program local_program;
cl_kernel local_kernel;
cl_mem local_mem_in, local_mem_out;
cl_event local_event;
size_t global_dim[3];
int i, j, error;
global_dim[0] = TEST_SIZE;
global_dim[1] = 1; global_dim[2] = 1;
cl_int *inData, *outData;
cl_int status;
inData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
outData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
for (i=0; i<TEST_SIZE; i++) {
inData[i] = i;
}
for (i=0; i<100; i++) {
memset(outData, 0, sizeof(cl_int)*TEST_SIZE);
local_context = clCreateContext(NULL, 1, &deviceID, notify_callback, NULL, &error);
test_error( error, "clCreateContext failed");
local_queue = clCreateCommandQueue(local_context, deviceID, 0, &error);
test_error( error, "clCreateCommandQueue failed");
local_program = clCreateProgramWithSource(local_context, 1, &repeate_test_kernel, NULL, &error);
test_error( error, "clCreateProgramWithSource failed");
error = clBuildProgram(local_program, 0, NULL, NULL, NULL, NULL);
test_error( error, "clBuildProgram failed");
local_kernel = clCreateKernel(local_program, "test_kernel", &error);
test_error( error, "clCreateKernel failed");
local_mem_in = clCreateBuffer(local_context, CL_MEM_READ_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
test_error( error, "clCreateBuffer failed");
local_mem_out = clCreateBuffer(local_context, CL_MEM_WRITE_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
test_error( error, "clCreateBuffer failed");
error = clEnqueueWriteBuffer(local_queue, local_mem_in, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), inData, 0, NULL, NULL);
test_error( error, "clEnqueueWriteBuffer failed");
error = clEnqueueWriteBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
test_error( error, "clEnqueueWriteBuffer failed");
error = clSetKernelArg(local_kernel, 0, sizeof(local_mem_in), &local_mem_in);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(local_kernel, 1, sizeof(local_mem_out), &local_mem_out);
test_error( error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(local_queue, local_kernel, 1, NULL, global_dim, NULL, 0, NULL, &local_event);
test_error( error, "clEnqueueNDRangeKernel failed");
error = clWaitForEvents(1, &local_event);
test_error( error, "clWaitForEvents failed");
error = clGetEventInfo(local_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL);
test_error( error, "clGetEventInfo failed");
if (status != CL_COMPLETE) {
log_error( "Kernel execution not complete: status %d.\n", status);
free(inData);
free(outData);
return -1;
}
error = clEnqueueReadBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed");
clReleaseEvent(local_event);
clReleaseMemObject(local_mem_in);
clReleaseMemObject(local_mem_out);
clReleaseKernel(local_kernel);
clReleaseProgram(local_program);
clReleaseCommandQueue(local_queue);
clReleaseContext(local_context);
for (j=0; j<TEST_SIZE; j++) {
if (outData[j] != inData[j] + 1) {
log_error("Results failed to validate at iteration %d. %d != %d.\n", i, outData[j], inData[j] + 1);
free(inData);
free(outData);
return -1;
}
}
}
free(inData);
free(outData);
return 0;
}

View File

@@ -0,0 +1,60 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/testHarness.h"
#ifndef _WIN32
#include <unistd.h>
#endif
int IsAPowerOfTwo( unsigned long x )
{
return 0 == (x & (x-1));
}
int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
cl_uint min_alignment;
if (gHasLong)
min_alignment = sizeof(cl_long)*16;
else
min_alignment = sizeof(cl_int)*16;
int error = 0;
cl_uint alignment;
error = clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(alignment), &alignment, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_MEM_BASE_ADDR_ALIGN failed");
log_info("Device reported CL_DEVICE_MEM_BASE_ADDR_ALIGN = %lu bits.\n", (unsigned long)alignment);
// Verify the size is large enough
if (alignment < min_alignment*8) {
log_error("ERROR: alignment too small. Minimum alignment for %s16 is %lu bits, device reported %lu bits.",
(gHasLong) ? "long" : "int",
(unsigned long)(min_alignment*8), (unsigned long)alignment);
return -1;
}
// Verify the size is a power of two
if (!IsAPowerOfTwo((unsigned long)alignment)) {
log_error("ERROR: alignment is not a power of two.\n");
return -1;
}
return 0;
}

View File

@@ -0,0 +1,141 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
extern "C" { extern cl_uint gRandomSeed;}
// This test is designed to stress changing kernel arguments between execute calls (that are asynchronous and thus
// potentially overlapping) to make sure each kernel gets the right arguments
// Note: put a delay loop in the kernel to make sure we have time to queue the next kernel before this one finishes
const char *inspect_image_kernel_source[] = {
"__kernel void sample_test(read_only image2d_t src, __global int *outDimensions )\n"
"{\n"
" int tid = get_global_id(0), i;\n"
" for( i = 0; i < 100000; i++ ); \n"
" outDimensions[tid * 2] = get_image_width(src) * tid;\n"
" outDimensions[tid * 2 + 1] = get_image_height(src) * tid;\n"
"\n"
"}\n" };
#define NUM_TRIES 100
#define NUM_THREADS 2048
int test_kernel_arg_changes(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
int error, i;
clMemWrapper images[ NUM_TRIES ];
size_t sizes[ NUM_TRIES ][ 2 ];
clMemWrapper results[ NUM_TRIES ];
cl_image_format imageFormat;
size_t maxWidth, maxHeight;
size_t threads[1], localThreads[1];
cl_int resultArray[ NUM_THREADS * 2 ];
char errStr[ 128 ];
RandomSeed seed( gRandomSeed );
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
// Just get any ol format to test with
error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &imageFormat );
test_error( error, "Unable to obtain suitable image format to test with!" );
// Create our testing kernel
error = create_single_kernel_helper( context, &program, &kernel, 1, inspect_image_kernel_source, "sample_test" );
test_error( error, "Unable to create testing kernel" );
// Get max dimensions for each of our images
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
test_error( error, "Unable to get max image dimensions for device" );
// Get the number of threads we'll be able to run
threads[0] = NUM_THREADS;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size for kernel" );
// Create a variety of images and output arrays
for( i = 0; i < NUM_TRIES; i++ )
{
sizes[ i ][ 0 ] = genrand_int32(seed) % (maxWidth/32) + 1;
sizes[ i ][ 1 ] = genrand_int32(seed) % (maxHeight/32) + 1;
images[ i ] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY),
&imageFormat, sizes[ i ][ 0], sizes[ i ][ 1 ], 0, NULL, &error );
if( images[i] == NULL )
{
log_error("Failed to create image %d of size %d x %d (%s).\n", i, (int)sizes[i][0], (int)sizes[i][1], IGetErrorString( error ));
return -1;
}
results[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * threads[0] * 2, NULL, &error );
if( results[i] == NULL)
{
log_error("Failed to create array %d of size %d.\n", i, (int)threads[0]*2);
return -1;
}
}
// Start setting arguments and executing kernels
for( i = 0; i < NUM_TRIES; i++ )
{
// Set the arguments for this try
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &images[ i ] );
sprintf( errStr, "Unable to set argument 0 for kernel try %d", i );
test_error( error, errStr );
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &results[ i ] );
sprintf( errStr, "Unable to set argument 1 for kernel try %d", i );
test_error( error, errStr );
// Queue up execution
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
sprintf( errStr, "Unable to execute kernel try %d", i );
test_error( error, errStr );
}
// Read the results back out, one at a time, and verify
for( i = 0; i < NUM_TRIES; i++ )
{
error = clEnqueueReadBuffer( queue, results[ i ], CL_TRUE, 0, sizeof( cl_int ) * threads[0] * 2, resultArray, 0, NULL, NULL );
sprintf( errStr, "Unable to read results for kernel try %d", i );
test_error( error, errStr );
// Verify. Each entry should be n * the (width/height) of image i
for( int j = 0; j < NUM_THREADS; j++ )
{
if( resultArray[ j * 2 + 0 ] != (int)sizes[ i ][ 0 ] * j )
{
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a width of %d, got %d\n",
i, j, (int)sizes[ i ][ 0 ] * j, resultArray[ j * 2 + 0 ] );
return -1;
}
if( resultArray[ j * 2 + 1 ] != (int)sizes[ i ][ 1 ] * j )
{
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a height of %d, got %d\n",
i, j, (int)sizes[ i ][ 1 ] * j, resultArray[ j * 2 + 1 ] );
return -1;
}
}
}
// If we got here, everything verified successfully
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,277 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/conversions.h"
// This test is designed to stress passing multiple vector parameters to kernels and verifying access between them all
const char *multi_arg_kernel_source_pattern =
"__kernel void sample_test(__global %s *src1, __global %s *src2, __global %s *src3, __global %s *dst1, __global %s *dst2, __global %s *dst3 )\n"
"{\n"
" int tid = get_global_id(0);\n"
" dst1[tid] = src1[tid];\n"
" dst2[tid] = src2[tid];\n"
" dst3[tid] = src3[tid];\n"
"}\n";
extern cl_uint gRandomSeed;
#define MAX_ERROR_TOLERANCE 0.0005f
int test_multi_arg_set(cl_device_id device, cl_context context, cl_command_queue queue,
ExplicitType vec1Type, int vec1Size,
ExplicitType vec2Type, int vec2Size,
ExplicitType vec3Type, int vec3Size, MTdata d)
{
clProgramWrapper program;
clKernelWrapper kernel;
int error, i, j;
clMemWrapper streams[ 6 ];
size_t threads[1], localThreads[1];
char programSrc[ 10248 ], vec1Name[ 64 ], vec2Name[ 64 ], vec3Name[ 64 ];
char sizeNames[][ 4 ] = { "", "2", "3", "4", "", "", "", "8" };
const char *ptr;
void *initData[3], *resultData[3];
// Create the program source
sprintf( vec1Name, "%s%s", get_explicit_type_name( vec1Type ), sizeNames[ vec1Size - 1 ] );
sprintf( vec2Name, "%s%s", get_explicit_type_name( vec2Type ), sizeNames[ vec2Size - 1 ] );
sprintf( vec3Name, "%s%s", get_explicit_type_name( vec3Type ), sizeNames[ vec3Size - 1 ] );
sprintf( programSrc, multi_arg_kernel_source_pattern,
vec1Name, vec2Name, vec3Name, vec1Name, vec2Name, vec3Name,
vec1Size, vec1Size, vec2Size, vec2Size, vec3Size, vec3Size );
ptr = programSrc;
// Create our testing kernel
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" );
test_error( error, "Unable to create testing kernel" );
// Get thread dimensions
threads[0] = 1024;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size for kernel" );
// Create input streams
initData[ 0 ] = create_random_data( vec1Type, d, (unsigned int)threads[ 0 ] * vec1Size );
streams[ 0 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, initData[ 0 ], &error );
test_error( error, "Unable to create testing stream" );
initData[ 1 ] = create_random_data( vec2Type, d, (unsigned int)threads[ 0 ] * vec2Size );
streams[ 1 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, initData[ 1 ], &error );
test_error( error, "Unable to create testing stream" );
initData[ 2 ] = create_random_data( vec3Type, d, (unsigned int)threads[ 0 ] * vec3Size );
streams[ 2 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, initData[ 2 ], &error );
test_error( error, "Unable to create testing stream" );
streams[ 3 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, NULL, &error );
test_error( error, "Unable to create testing stream" );
streams[ 4 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, NULL, &error );
test_error( error, "Unable to create testing stream" );
streams[ 5 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, NULL, &error );
test_error( error, "Unable to create testing stream" );
// Set the arguments
error = 0;
for( i = 0; i < 6; i++ )
error |= clSetKernelArg( kernel, i, sizeof( cl_mem ), &streams[ i ] );
test_error( error, "Unable to set arguments for kernel" );
// Execute!
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to execute kernel" );
// Read results
resultData[0] = malloc( get_explicit_type_size( vec1Type ) * vec1Size * threads[0] );
resultData[1] = malloc( get_explicit_type_size( vec2Type ) * vec2Size * threads[0] );
resultData[2] = malloc( get_explicit_type_size( vec3Type ) * vec3Size * threads[0] );
error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, get_explicit_type_size( vec1Type ) * vec1Size * threads[ 0 ], resultData[0], 0, NULL, NULL );
error |= clEnqueueReadBuffer( queue, streams[ 4 ], CL_TRUE, 0, get_explicit_type_size( vec2Type ) * vec2Size * threads[ 0 ], resultData[1], 0, NULL, NULL );
error |= clEnqueueReadBuffer( queue, streams[ 5 ], CL_TRUE, 0, get_explicit_type_size( vec3Type ) * vec3Size * threads[ 0 ], resultData[2], 0, NULL, NULL );
test_error( error, "Unable to read result stream" );
// Verify
char *ptr1 = (char *)initData[ 0 ], *ptr2 = (char *)resultData[ 0 ];
size_t span = get_explicit_type_size( vec1Type );
for( i = 0; i < (int)threads[0]; i++ )
{
for( j = 0; j < vec1Size; j++ )
{
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
{
log_error( "ERROR: Value did not validate for component %d of item %d of stream 0!\n", j, i );
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return -1;
}
}
ptr1 += span * vec1Size;
ptr2 += span * vec1Size;
}
ptr1 = (char *)initData[ 1 ];
ptr2 = (char *)resultData[ 1 ];
span = get_explicit_type_size( vec2Type );
for( i = 0; i < (int)threads[0]; i++ )
{
for( j = 0; j < vec2Size; j++ )
{
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
{
log_error( "ERROR: Value did not validate for component %d of item %d of stream 1!\n", j, i );
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return -1;
}
}
ptr1 += span * vec2Size;
ptr2 += span * vec2Size;
}
ptr1 = (char *)initData[ 2 ];
ptr2 = (char *)resultData[ 2 ];
span = get_explicit_type_size( vec3Type );
for( i = 0; i < (int)threads[0]; i++ )
{
for( j = 0; j < vec3Size; j++ )
{
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
{
log_error( "ERROR: Value did not validate for component %d of item %d of stream 2!\n", j, i );
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return -1;
}
}
ptr1 += span * vec3Size;
ptr2 += span * vec3Size;
}
// If we got here, everything verified successfully
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return 0;
}
int test_kernel_arg_multi_setup_exhaustive(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
// Loop through every combination of input and output types
ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
int type1, type2, type3;
int size1, size2, size3;
RandomSeed seed( gRandomSeed );
log_info( "\n" ); // for formatting
for( type1 = 0; types[ type1 ] != kNumExplicitTypes; type1++ )
{
for( type2 = 0; types[ type2 ] != kNumExplicitTypes; type2++ )
{
for( type3 = 0; types[ type3 ] != kNumExplicitTypes; type3++ )
{
log_info( "\n\ttesting %s, %s, %s...", get_explicit_type_name( types[ type1 ] ), get_explicit_type_name( types[ type2 ] ), get_explicit_type_name( types[ type3 ] ) );
// Loop through every combination of vector size
for( size1 = 2; size1 <= 8; size1 <<= 1 )
{
for( size2 = 2; size2 <= 8; size2 <<= 1 )
{
for( size3 = 2; size3 <= 8; size3 <<= 1 )
{
log_info(".");
fflush( stdout);
if( test_multi_arg_set( device, context, queue,
types[ type1 ], size1,
types[ type2 ], size2,
types[ type3 ], size3, seed ) )
return -1;
}
}
}
}
}
}
log_info( "\n" );
return 0;
}
int test_kernel_arg_multi_setup_random(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
// Loop through a selection of combinations
ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
int type1, type2, type3;
int size1, size2, size3;
RandomSeed seed( gRandomSeed );
num_elements = 3*3*3*4;
log_info( "Testing %d random configurations\n", num_elements );
// Loop through every combination of vector size
for( size1 = 2; size1 <= 8; size1 <<= 1 )
{
for( size2 = 2; size2 <= 8; size2 <<= 1 )
{
for( size3 = 2; size3 <= 8; size3 <<= 1 )
{
// Loop through 4 type combinations for each size combination
int n;
for (n=0; n<4; n++) {
type1 = (int)get_random_float(0,4, seed);
type2 = (int)get_random_float(0,4, seed);
type3 = (int)get_random_float(0,4, seed);
log_info( "\ttesting %s%d, %s%d, %s%d...\n",
get_explicit_type_name( types[ type1 ] ), size1,
get_explicit_type_name( types[ type2 ] ), size2,
get_explicit_type_name( types[ type3 ] ), size3 );
if( test_multi_arg_set( device, context, queue,
types[ type1 ], size1,
types[ type2 ], size2,
types[ type3 ], size3, seed ) )
return -1;
}
}
}
}
return 0;
}

View File

@@ -0,0 +1,704 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/conversions.h"
extern cl_uint gRandomSeed;
const char *sample_single_test_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
const char *sample_struct_test_kernel[] = {
"typedef struct {\n"
"__global int *A;\n"
"__global int *B;\n"
"} input_pair_t;\n"
"\n"
"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src->A[tid] + src->B[tid];\n"
"\n"
"}\n" };
const char *sample_struct_array_test_kernel[] = {
"typedef struct {\n"
"int A;\n"
"int B;\n"
"} input_pair_t;\n"
"\n"
"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src[tid].A + src[tid].B;\n"
"\n"
"}\n" };
const char *sample_const_test_kernel[] = {
"__kernel void sample_test(__constant int *src1, __constant int *src2, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src1[tid] + src2[tid];\n"
"\n"
"}\n" };
const char *sample_const_global_test_kernel[] = {
"__constant int addFactor = 1024;\n"
"__kernel void sample_test(__global int *src1, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src1[tid] + addFactor;\n"
"\n"
"}\n" };
const char *sample_two_kernel_program[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n",
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n" };
int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program, testProgram;
cl_context testContext;
cl_kernel kernel;
cl_char name[ 512 ];
cl_uint numArgs, numInstances;
size_t paramSize;
/* Create reference */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, NULL, 0, &paramSize );
test_error( error, "Unable to get kernel function name param size" );
if( paramSize != strlen( "sample_test" ) + 1 )
{
log_error( "ERROR: Kernel function name param returns invalid size (expected %d, got %d)\n", (int)strlen( "sample_test" ) + 1, (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( name ), name, NULL );
test_error( error, "Unable to get kernel function name" );
if( strcmp( (char *)name, "sample_test" ) != 0 )
{
log_error( "ERROR: Kernel function name returned invalid value (expected sample_test, got %s)\n", (char *)name );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &paramSize );
test_error( error, "Unable to get kernel arg count param size" );
if( paramSize != sizeof( numArgs ) )
{
log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numArgs ), (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
test_error( error, "Unable to get kernel arg count" );
if( numArgs != 2 )
{
log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d)\n", 2, numArgs );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, 0, NULL, &paramSize );
test_error( error, "Unable to get kernel reference count param size" );
if( paramSize != sizeof( numInstances ) )
{
log_error( "ERROR: Kernel reference count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numInstances ), (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL );
test_error( error, "Unable to get kernel reference count" );
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, NULL, 0, &paramSize );
test_error( error, "Unable to get kernel program param size" );
if( paramSize != sizeof( testProgram ) )
{
log_error( "ERROR: Kernel program param returns invalid size (expected %d, got %d)\n", (int)sizeof( testProgram ), (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( testProgram ), &testProgram, NULL );
test_error( error, "Unable to get kernel program" );
if( testProgram != program )
{
log_error( "ERROR: Kernel program returned invalid value (expected %p, got %p)\n", program, testProgram );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( testContext ), &testContext, NULL );
test_error( error, "Unable to get kernel context" );
if( testContext != context )
{
log_error( "ERROR: Kernel context returned invalid value (expected %p, got %p)\n", context, testContext );
return -1;
}
/* Release memory */
clReleaseKernel( kernel );
clReleaseProgram( program );
return 0;
}
int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_float inputData[100];
cl_int outputData[100];
RandomSeed seed( gRandomSeed );
int i;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 100, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 100, NULL, &error);
test_error( error, "Creating test array failed" );
/* Write some test data */
memset( outputData, 0, sizeof( outputData ) );
for (i=0; i<100; i++)
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*100, (void *)inputData, 0, NULL, NULL);
test_error( error, "Unable to set testing kernel data" );
/* Set the arguments */
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set kernel arguments" );
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
test_error( error, "Unable to set kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)100;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
/* Try again */
if( localThreads[0] > 1 )
localThreads[0] /= 2;
while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
localThreads[0]--;
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
/* And again */
if( localThreads[0] > 1 )
localThreads[0] /= 2;
while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
localThreads[0]--;
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
/* One more time */
localThreads[0] = (unsigned int)1;
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
return 0;
}
int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_float inputData[10];
cl_int outputData[10];
RandomSeed seed( gRandomSeed );
int i;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Write some test data */
memset( outputData, 0, sizeof( outputData ) );
for (i=0; i<10; i++)
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
test_error( error, "Unable to set testing kernel data" );
/* Test setting the arguments by index manually */
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
return 0;
}
int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_kernel kernel;
void *args[2];
cl_mem outStream;
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
cl_int randomTestDataA[10], randomTestDataB[10];
MTdata d;
struct img_pair_t
{
cl_mem streamA;
cl_mem streamB;
} image_pair;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
randomTestDataA[i] = (cl_int)genrand_int32(d);
randomTestDataB[i] = (cl_int)genrand_int32(d);
}
free_mtdata(d); d = NULL;
image_pair.streamA = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
test_error( error, "Creating test array failed" );
image_pair.streamB = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
test_error( error, "Creating test array failed" );
outStream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
args[0] = &image_pair;
args[1] = outStream;
error = clSetKernelArg(kernel, 0, sizeof( image_pair ), &image_pair);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), &args[1]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
{
log_error( "ERROR: Data did not verify!\n" );
return -1;
}
}
clReleaseMemObject( image_pair.streamA );
clReleaseMemObject( image_pair.streamB );
clReleaseMemObject( outStream );
clReleaseKernel( kernel );
clReleaseProgram( program );
return 0;
}
int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[3];
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
cl_int randomTestDataA[10], randomTestDataB[10];
cl_ulong maxSize;
MTdata d;
/* Verify our test buffer won't be bigger than allowed */
error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
test_error( error, "Unable to get max constant buffer size" );
if( maxSize < sizeof( cl_int ) * 10 )
{
log_error( "ERROR: Unable to test constant argument to kernel: max size of constant buffer is reported as %d!\n", (int)maxSize );
return -1;
}
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffffff; /* Make sure values are positive, just so we don't have to */
randomTestDataB[i] = (cl_int)genrand_int32(d) & 0xffffff; /* deal with overflow on the verification */
}
free_mtdata(d); d = NULL;
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
test_error( error, "Creating test array failed" );
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
{
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + %d (%d)\n", i, outputData[i], randomTestDataA[i], randomTestDataB[i], ( randomTestDataA[i] + randomTestDataB[i] ) );
return -1;
}
}
return 0;
}
int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
MTdata d;
typedef struct img_pair_type
{
int A;
int B;
} image_pair_t;
image_pair_t image_pair[ 10 ];
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_array_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
image_pair[i].A = (cl_int)genrand_int32(d);
image_pair[i].A = (cl_int)genrand_int32(d);
}
free_mtdata(d); d = NULL;
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(image_pair_t) * 10, (void *)image_pair, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != image_pair[i].A + image_pair[i].B)
{
log_error( "ERROR: Data did not verify!\n" );
return -1;
}
}
return 0;
}
int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_kernel kernel[3];
unsigned int kernelCount;
/* Create a test program */
program = clCreateProgramWithSource( context, 2, sample_two_kernel_program, NULL, &error);
if( program == NULL || error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create test program!\n" );
return -1;
}
/* Build */
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build test program" );
/* Try getting the kernel count */
error = clCreateKernelsInProgram( program, 0, NULL, &kernelCount );
test_error( error, "Unable to get kernel count for built program" );
if( kernelCount != 2 )
{
log_error( "ERROR: Returned kernel count from clCreateKernelsInProgram is incorrect! (got %d, expected 2)\n", kernelCount );
return -1;
}
/* Try actually getting the kernels */
error = clCreateKernelsInProgram( program, 2, kernel, NULL );
test_error( error, "Unable to get kernels for built program" );
clReleaseKernel( kernel[0] );
clReleaseKernel( kernel[1] );
clReleaseProgram( program );
return 0;
}
int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
cl_int randomTestDataA[10];
MTdata d;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_global_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffff; /* Make sure values are positive and small, just so we don't have to */
}
free_mtdata(d); d = NULL;
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != randomTestDataA[i] + 1024)
{
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + 1024 (%d)\n", i, outputData[i], randomTestDataA[i], ( randomTestDataA[i] + 1024 ) );
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,750 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/testHarness.h"
extern cl_uint gRandomSeed;
#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast ) \
error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get mem object " name ); \
if( val != expected ) \
{ \
log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type " from %s:%d)\n", \
expected, (cast)val, __FILE__, __LINE__ ); \
return -1; \
} \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d from %s:%d)\n", \
(int)sizeof( val ), (int)size , __FILE__, __LINE__ ); \
return -1; \
}
static void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void * data )
{
free( data );
}
static unsigned int
get_image_dim(MTdata *d, unsigned int mod)
{
unsigned int val = 0;
do
{
val = (unsigned int)genrand_int32(*d) % mod;
} while (val == 0);
return val;
}
int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
int error;
size_t size;
void * buffer = NULL;
clMemWrapper bufferObject;
clMemWrapper subBufferObject;
cl_mem_flags bufferFlags[] = {
CL_MEM_READ_WRITE,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_ONLY,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_WRITE_ONLY,
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
};
cl_mem_flags subBufferFlags[] = {
CL_MEM_READ_WRITE,
CL_MEM_READ_ONLY,
CL_MEM_WRITE_ONLY,
0,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_READ_ONLY | 0,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_WRITE_ONLY | 0,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_NO_ACCESS | 0,
};
// Get the address alignment, so we can make sure the sub-buffer test later works properly.
cl_uint addressAlignBits;
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(addressAlignBits), &addressAlignBits, NULL );
size_t addressAlign = addressAlignBits/8;
if ( addressAlign < 128 )
{
addressAlign = 128;
}
for ( unsigned int i = 0; i < sizeof(bufferFlags) / sizeof(cl_mem_flags); ++i )
{
//printf("@@@ bufferFlags[%u]=0x%x\n", i, bufferFlags[ i ]);
if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
{
// Create a buffer object to test against.
buffer = malloc( addressAlign * 4 );
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
if ( error )
{
free( buffer );
test_error( error, "Unable to create buffer (CL_MEM_USE_HOST_PTR) to test with" );
}
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
test_error( error, "Unable to set mem object destructor callback" );
void * ptr;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_HOST_PTR, ptr, buffer, "host pointer", "%p", void * )
}
else if ( (bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR) && (bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR) )
{
// Create a buffer object to test against.
buffer = malloc( addressAlign * 4 );
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
if ( error )
{
free( buffer );
test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
test_error( error, "Unable to set mem object destructor callback" );
}
else if ( bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR )
{
// Create a buffer object to test against.
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR) to test with" );
}
else if ( bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR )
{
// Create a buffer object to test against.
buffer = malloc( addressAlign * 4 );
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
if ( error )
{
free( buffer );
test_error( error, "Unable to create buffer (CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
test_error( error, "Unable to set mem object destructor callback" );
}
else
{
// Create a buffer object to test against.
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
test_error( error, "Unable to create buffer to test with" );
}
// Perform buffer object queries.
cl_mem_object_type type;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
cl_mem_flags flags;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_FLAGS, flags, (unsigned int)bufferFlags[ i ], "flags", "%d", unsigned int )
size_t sz;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign * 4 ), "size", "%ld", size_t )
cl_uint mapCount;
error = clGetMemObjectInfo( bufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
test_error( error, "Unable to get mem object map count" );
if( size != sizeof( mapCount ) )
{
log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
cl_uint refCount;
error = clGetMemObjectInfo( bufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get mem object reference count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
cl_context otherCtx;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
cl_mem origObj;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (void *)NULL, "associated mem object", "%p", void * )
size_t offset;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
cl_buffer_region region;
region.origin = addressAlign;
region.size = addressAlign;
// Loop over possible sub-buffer objects to create.
for ( unsigned int j = 0; j < sizeof(subBufferFlags) / sizeof(cl_mem_flags); ++j )
{
if ( subBufferFlags[ j ] & CL_MEM_READ_WRITE )
{
if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) )
continue; // Buffer must be read_write for sub-buffer to be read_write.
}
if ( subBufferFlags[ j ] & CL_MEM_READ_ONLY )
{
if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_READ_ONLY) )
continue; // Buffer must be read_write or read_only for sub-buffer to be read_only
}
if ( subBufferFlags[ j ] & CL_MEM_WRITE_ONLY )
{
if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_WRITE_ONLY) )
continue; // Buffer must be read_write or write_only for sub-buffer to be write_only
}
if ( subBufferFlags[ j ] & CL_MEM_HOST_READ_ONLY )
{
if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_WRITE_ONLY) )
continue; // Buffer must be host all access or host read_only for sub-buffer to be host read_only
}
if ( subBufferFlags[ j ] & CL_MEM_HOST_WRITE_ONLY )
{
if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_READ_ONLY) )
continue; // Buffer must be host all access or host write_only for sub-buffer to be host write_only
}
//printf("@@@ bufferFlags[%u]=0x%x subBufferFlags[%u]=0x%x\n", i, bufferFlags[ i ], j, subBufferFlags[ j ]);
subBufferObject = clCreateSubBuffer( bufferObject, subBufferFlags[ j ], CL_BUFFER_CREATE_TYPE_REGION, &region, &error );
test_error( error, "Unable to create sub-buffer to test against" );
// Perform sub-buffer object queries.
cl_mem_object_type type;
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
cl_mem_flags flags;
cl_mem_flags inheritedFlags = subBufferFlags[ j ];
if ( (subBufferFlags[ j ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY)) == 0 )
{
inheritedFlags |= bufferFlags[ i ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY);
}
inheritedFlags |= bufferFlags[ i ] & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR);
if ( (subBufferFlags[ j ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) == 0)
{
inheritedFlags |= bufferFlags[ i ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS);
}
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_FLAGS, flags, (unsigned int)inheritedFlags, "flags", "%d", unsigned int )
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign ), "size", "%ld", size_t )
if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
{
void * ptr;
void * offsetInBuffer = (char *)buffer + addressAlign;
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_HOST_PTR, ptr, offsetInBuffer, "host pointer", "%p", void * )
}
cl_uint mapCount;
error = clGetMemObjectInfo( subBufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
test_error( error, "Unable to get mem object map count" );
if( size != sizeof( mapCount ) )
{
log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
cl_uint refCount;
error = clGetMemObjectInfo( subBufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get mem object reference count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
cl_context otherCtx;
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (cl_mem)bufferObject, "associated mem object", "%p", void * )
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_OFFSET, offset, (size_t)( addressAlign ), "offset", "%ld", size_t )
clReleaseMemObject( subBufferObject );
subBufferObject = NULL;
}
clReleaseMemObject( bufferObject );
bufferObject = NULL;
}
return CL_SUCCESS;
}
int test_get_imageObject_info( cl_mem * image, cl_mem_flags objectFlags, cl_image_desc *imageInfo, cl_image_format *imageFormat, size_t pixelSize, cl_context context )
{
int error;
size_t size;
cl_mem_object_type type;
cl_mem_flags flags;
cl_uint mapCount;
cl_uint refCount;
size_t rowPitchMultiplier;
size_t slicePitchMultiplier;
cl_context otherCtx;
size_t offset;
size_t sz;
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_TYPE, type, imageInfo->image_type, "type", "%d", int )
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_FLAGS, flags, (unsigned int)objectFlags, "flags", "%d", unsigned int )
error = clGetMemObjectInfo( *image, CL_MEM_SIZE, sizeof( sz ), &sz, NULL );
test_error( error, "Unable to get mem size" );
// The size returned is not constrained by the spec.
error = clGetMemObjectInfo( *image, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
test_error( error, "Unable to get mem object map count" );
if( size != sizeof( mapCount ) )
{
log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
error = clGetMemObjectInfo( *image, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get mem object reference count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
return CL_SUCCESS;
}
int test_get_image_info( cl_device_id deviceID, cl_context context, cl_mem_object_type type )
{
int error;
size_t size;
void * image = NULL;
cl_mem imageObject;
cl_image_desc imageInfo;
cl_mem_flags imageFlags[] = {
CL_MEM_READ_WRITE,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_ONLY,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_WRITE_ONLY,
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
};
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
cl_image_format imageFormat;
size_t pixelSize = 4;
imageFormat.image_channel_order = CL_RGBA;
imageFormat.image_channel_data_type = CL_UNORM_INT8;
imageInfo.image_width = imageInfo.image_height = imageInfo.image_depth = 1;
imageInfo.image_array_size = 0;
imageInfo.num_mip_levels = imageInfo.num_samples = 0;
imageInfo.buffer = NULL;
d = init_genrand( gRandomSeed );
for ( unsigned int i = 0; i < sizeof(imageFlags) / sizeof(cl_mem_flags); ++i )
{
imageInfo.image_row_pitch = 0;
imageInfo.image_slice_pitch = 0;
switch (type)
{
case CL_MEM_OBJECT_IMAGE1D:
imageInfo.image_width = get_image_dim(&d, 1023);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D;
break;
case CL_MEM_OBJECT_IMAGE2D:
imageInfo.image_width = get_image_dim(&d, 1023);
imageInfo.image_height = get_image_dim(&d, 1023);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D;
break;
case CL_MEM_OBJECT_IMAGE3D:
imageInfo.image_width = get_image_dim(&d, 127);
imageInfo.image_height = get_image_dim(&d, 127);
imageInfo.image_depth = get_image_dim(&d, 127);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE3D;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
imageInfo.image_width = get_image_dim(&d, 1023);
imageInfo.image_array_size = get_image_dim(&d, 1023);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
imageInfo.image_width = get_image_dim(&d, 255);
imageInfo.image_height = get_image_dim(&d, 255);
imageInfo.image_array_size = get_image_dim(&d, 255);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
break;
}
if ( imageFlags[i] & CL_MEM_USE_HOST_PTR )
{
// Create an image object to test against.
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_USE_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
void * ptr;
TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
// release image object
clReleaseMemObject(imageObject);
// Try again with non-zero rowPitch.
imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
switch (type)
{
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
case CL_MEM_OBJECT_IMAGE3D:
imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
break;
}
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image2d (CL_MEM_USE_HOST_PTR) to test with" );
}
// Make sure image2d is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
else if ( (imageFlags[i] & CL_MEM_ALLOC_HOST_PTR) && (imageFlags[i] & CL_MEM_COPY_HOST_PTR) )
{
// Create an image object to test against.
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
int ret = test_get_imageObject_info( &imageObject, imageFlags[ i ], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
// release image object
clReleaseMemObject(imageObject);
// Try again with non-zero rowPitch.
imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
switch (type)
{
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
case CL_MEM_OBJECT_IMAGE3D:
imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
break;
}
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
else if ( imageFlags[i] & CL_MEM_ALLOC_HOST_PTR )
{
// Create an image object to test against.
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR) to test with" );
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
else if ( imageFlags[i] & CL_MEM_COPY_HOST_PTR )
{
// Create an image object to test against.
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
clReleaseMemObject(imageObject);
// Try again with non-zero rowPitch.
imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
switch (type)
{
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
case CL_MEM_OBJECT_IMAGE3D:
imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
break;
}
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
else
{
// Create an image object to test against.
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
test_error( error, "Unable to create image to test with" );
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
clReleaseMemObject( imageObject );
}
return CL_SUCCESS;
}
int test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D);
}
int test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE3D);
}
int test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D);
}
int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D_ARRAY);
}
int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D_ARRAY);
}

View File

@@ -0,0 +1,108 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
static volatile cl_int sDestructorIndex;
void CL_CALLBACK mem_destructor_callback( cl_mem memObject, void * userData )
{
int * userPtr = (int *)userData;
// ordering of callbacks is guaranteed, meaning we don't need to do atomic operation here
*userPtr = ++sDestructorIndex;
}
#ifndef ABS
#define ABS( x ) ( ( x < 0 ) ? -x : x )
#endif
int test_mem_object_destructor_callback_single( clMemWrapper &memObject )
{
cl_int error;
int i;
// Set up some variables to catch the order in which callbacks are called
volatile int callbackOrders[ 3 ] = { 0, 0, 0 };
sDestructorIndex = 0;
// Set up the callbacks
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 0 ] );
test_error( error, "Unable to set destructor callback" );
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 1 ] );
test_error( error, "Unable to set destructor callback" );
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 2 ] );
test_error( error, "Unable to set destructor callback" );
// Now release the buffer, which SHOULD call the callbacks
error = clReleaseMemObject( memObject );
test_error( error, "Unable to release test buffer" );
// Note: since we manually released the mem wrapper, we need to set it to NULL to prevent a double-release
memObject = NULL;
// At this point, all three callbacks should have already been called
int numErrors = 0;
for( i = 0; i < 3; i++ )
{
// Spin waiting for the release to finish. If you don't call the mem_destructor_callback, you will not
// pass the test. bugzilla 6316
while( 0 == callbackOrders[i] )
{}
if( ABS( callbackOrders[ i ] ) != 3-i )
{
log_error( "\tERROR: Callback %d was called in the wrong order! (Was called order %d, should have been order %d)\n",
i+1, ABS( callbackOrders[ i ] ), i );
numErrors++;
}
}
return ( numErrors > 0 ) ? -1 : 0;
}
int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clMemWrapper testBuffer, testImage;
cl_int error;
// Create a buffer and an image to test callbacks against
testBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, 1024, NULL, &error );
test_error( error, "Unable to create testing buffer" );
if( test_mem_object_destructor_callback_single( testBuffer ) != 0 )
{
log_error( "ERROR: Destructor callbacks for buffer object FAILED\n" );
return -1;
}
if( checkForImageSupport( deviceID ) == 0 )
{
cl_image_format imageFormat = { CL_RGBA, CL_SIGNED_INT8 };
testImage = create_image_2d( context, CL_MEM_READ_ONLY, &imageFormat, 16, 16, 0, NULL, &error );
test_error( error, "Unable to create testing image" );
if( test_mem_object_destructor_callback_single( testImage ) != 0 )
{
log_error( "ERROR: Destructor callbacks for image object FAILED\n" );
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,121 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#ifndef _WIN32
#include <unistd.h>
#endif
#include "../../test_common/harness/conversions.h"
extern cl_uint gRandomSeed;
static void CL_CALLBACK test_native_kernel_fn( void *userData )
{
struct arg_struct {
cl_int * source;
cl_int * dest;
cl_int count;
} *args = (arg_struct *)userData;
for( cl_int i = 0; i < args->count; i++ )
args->dest[ i ] = args->source[ i ];
}
int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
int error;
RandomSeed seed( gRandomSeed );
// Check if we support native kernels
cl_device_exec_capabilities capabilities;
error = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(capabilities), &capabilities, NULL);
if (!(capabilities & CL_EXEC_NATIVE_KERNEL)) {
log_info("Device does not support CL_EXEC_NATIVE_KERNEL.\n");
return 0;
}
clMemWrapper streams[ 2 ];
#if !(defined (_WIN32) && defined (_MSC_VER))
cl_int inBuffer[ n_elems ], outBuffer[ n_elems ];
#else
cl_int* inBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
cl_int* outBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
#endif
clEventWrapper finishEvent;
struct arg_struct
{
cl_mem inputStream;
cl_mem outputStream;
cl_int count;
} args;
// Create some input values
generate_random_data( kInt, n_elems, seed, inBuffer );
// Create I/O streams
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int), inBuffer, &error );
test_error( error, "Unable to create I/O stream" );
streams[ 1 ] = clCreateBuffer( context, 0, n_elems * sizeof(cl_int), NULL, &error );
test_error( error, "Unable to create I/O stream" );
// Set up the arrays to call with
args.inputStream = streams[ 0 ];
args.outputStream = streams[ 1 ];
args.count = n_elems;
void * memLocs[ 2 ] = { &args.inputStream, &args.outputStream };
// Run the kernel
error = clEnqueueNativeKernel( queue, test_native_kernel_fn,
&args, sizeof( args ),
2, &streams[ 0 ],
(const void **)memLocs,
0, NULL, &finishEvent );
test_error( error, "Unable to queue native kernel" );
// Finish and wait for the kernel to complete
error = clFinish( queue );
test_error(error, "clFinish failed");
error = clWaitForEvents( 1, &finishEvent );
test_error(error, "clWaitForEvents failed");
// Now read the results and verify
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, n_elems * sizeof(cl_int), outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
for( int i = 0; i < n_elems; i++ )
{
if( inBuffer[ i ] != outBuffer[ i ] )
{
log_error( "ERROR: Data sample %d for native kernel did not validate (expected %d, got %d)\n",
i, (int)inBuffer[ i ], (int)outBuffer[ i ] );
return 1;
}
}
return 0;
}

View File

@@ -0,0 +1,162 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <stdio.h>
#if defined(__APPLE__)
#include <OpenCL/opencl.h>
#include <OpenCL/cl_platform.h>
#else
#include <CL/opencl.h>
#include <CL/cl_platform.h>
#endif
#include "procs.h"
enum { SUCCESS, FAILURE };
typedef enum { NON_NULL_PATH, ADDROF_NULL_PATH, NULL_PATH } test_type;
#define NITEMS 4096
/* places the casted long value of the src ptr into each element of the output
* array, to allow testing that the kernel actually _gets_ the NULL value */
const char *kernel_string =
"kernel void test_kernel(global float *src, global long *dst)\n"
"{\n"
" uint tid = get_global_id(0);\n"
" dst[tid] = (long)src;\n"
"}\n";
/*
* The guts of the test:
* call setKernelArgs with a regular buffer, &NULL, or NULL depending on
* the value of 'test_type'
*/
static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel,
cl_mem test_buf, cl_mem result_buf, test_type type)
{
unsigned int test_success = 0;
unsigned int i;
cl_int status;
char *typestr;
if (type == NON_NULL_PATH) {
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
typestr = "non-NULL";
} else if (type == ADDROF_NULL_PATH) {
test_buf = NULL;
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
typestr = "&NULL";
} else if (type == NULL_PATH) {
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
typestr = "NULL";
}
log_info("Testing setKernelArgs with %s buffer.\n", typestr);
if (status != CL_SUCCESS) {
log_error("clSetKernelArg failed with status: %d\n", status);
return FAILURE; // no point in continuing *this* test
}
size_t global = NITEMS;
status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global,
NULL, 0, NULL, NULL);
test_error(status, "NDRangeKernel failed.");
cl_long* host_result = (cl_long*)malloc(NITEMS*sizeof(cl_long));
status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0,
sizeof(cl_long)*NITEMS, host_result, 0, NULL, NULL);
test_error(status, "ReadBuffer failed.");
// in the non-null case, we expect NONZERO values:
if (type == NON_NULL_PATH) {
for (i=0; i<NITEMS; i++) {
if (host_result[i] == 0) {
log_error("failure: item %d in the result buffer was unexpectedly NULL.\n", i);
test_success = FAILURE; break;
}
}
} else if (type == ADDROF_NULL_PATH || type == NULL_PATH) {
for (i=0; i<NITEMS; i++) {
if (host_result[i] != 0) {
log_error("failure: item %d in the result buffer was unexpectedly non-NULL.\n", i);
test_success = FAILURE; break;
}
}
}
free(host_result);
if (test_success == SUCCESS) {
log_info("\t%s ok.\n", typestr);
}
return test_success;
}
int test_null_buffer_arg(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
unsigned int test_success = 0;
unsigned int i;
cl_int status;
cl_program program;
cl_kernel kernel;
// prep kernel:
program = clCreateProgramWithSource(context, 1, &kernel_string, NULL, &status);
test_error(status, "CreateProgramWithSource failed.");
status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
test_error(status, "BuildProgram failed.");
kernel = clCreateKernel(program, "test_kernel", &status);
test_error(status, "CreateKernel failed.");
cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float),
NULL, NULL);
cl_mem dev_dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, NITEMS*sizeof(cl_long),
NULL, NULL);
// set the destination buffer normally:
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_dst);
test_error(status, "SetKernelArg failed.");
//
// we test three cases:
//
// - typical case, used everyday: non-null buffer
// - the case of src as &NULL (the spec-compliance test)
// - the case of src as NULL (the backwards-compatibility test, Apple only)
//
test_success = test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NON_NULL_PATH);
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, ADDROF_NULL_PATH);
#ifdef __APPLE__
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NULL_PATH);
#endif
// clean up:
if (dev_src) clReleaseMemObject(dev_src);
clReleaseMemObject(dev_dst);
clReleaseKernel(kernel);
clReleaseProgram(program);
return test_success;
}

View File

@@ -0,0 +1,289 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include <string.h>
#define EXTENSION_NAME_BUF_SIZE 4096
#define PRINT_EXTENSION_INFO 0
int test_platform_extensions(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
const char * extensions[] = {
"cl_khr_byte_addressable_store",
// "cl_APPLE_SetMemObjectDestructor",
"cl_khr_global_int32_base_atomics",
"cl_khr_global_int32_extended_atomics",
"cl_khr_local_int32_base_atomics",
"cl_khr_local_int32_extended_atomics",
"cl_khr_int64_base_atomics",
"cl_khr_int64_extended_atomics",
// need to put in entires for various atomics
"cl_khr_3d_image_writes",
"cl_khr_fp16",
"cl_khr_fp64",
NULL
};
bool extensionsSupported[] = {
false, //"cl_khr_byte_addressable_store",
false, // need to put in entires for various atomics
false, // "cl_khr_global_int32_base_atomics",
false, // "cl_khr_global_int32_extended_atomics",
false, // "cl_khr_local_int32_base_atomics",
false, // "cl_khr_local_int32_extended_atomics",
false, // "cl_khr_int64_base_atomics",
false, // "cl_khr_int64_extended_atomics",
false, //"cl_khr_3d_image_writes",
false, //"cl_khr_fp16",
false, //"cl_khr_fp64",
false //NULL
};
int extensionIndex;
cl_platform_id platformID;
cl_int err;
char platform_extensions[EXTENSION_NAME_BUF_SIZE];
char device_extensions[EXTENSION_NAME_BUF_SIZE];
// Okay, so what we're going to do is just check the device indicated by
// deviceID against the platform that includes this device
// pass CL_DEVICE_PLATFORM to clGetDeviceInfo
// to get a result of type cl_platform_id
err = clGetDeviceInfo(deviceID,
CL_DEVICE_PLATFORM,
sizeof(cl_platform_id),
(void *)(&platformID),
NULL);
if(err != CL_SUCCESS)
{
vlog_error("test_platform_extensions : could not get platformID from device\n");
return -1;
}
// now we grab the set of extensions specified by the platform
err = clGetPlatformInfo(platformID,
CL_PLATFORM_EXTENSIONS,
sizeof(platform_extensions),
(void *)(&platform_extensions[0]),
NULL);
if(err != CL_SUCCESS)
{
vlog_error("test_platform_extensions : could not get extension string from platform\n");
return -1;
}
#if PRINT_EXTENSION_INFO
log_info("Platform extensions include \"%s\"\n\n", platform_extensions);
#endif
// here we parse the platform extensions, to look for the "important" ones
for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
{
if(strstr(platform_extensions, extensions[extensionIndex]) != NULL)
{
// we found it
#if PRINT_EXTENSION_INFO
log_info("Found \"%s\" in platform extensions\n",
extensions[extensionIndex]);
#endif
extensionsSupported[extensionIndex] = true;
}
}
// and then we grab the set of extensions specified by the device
// (this can be turned into a "loop over all devices in this platform")
err = clGetDeviceInfo(deviceID,
CL_DEVICE_EXTENSIONS,
sizeof(device_extensions),
(void *)(&device_extensions[0]),
NULL);
if(err != CL_SUCCESS)
{
vlog_error("test_platform_extensions : could not get extension string from device\n");
return -1;
}
#if PRINT_EXTENSION_INFO
log_info("Device extensions include \"%s\"\n\n", device_extensions);
#endif
for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
{
if(extensionsSupported[extensionIndex] == false)
{
continue; // skip this one
}
if(strstr(device_extensions, extensions[extensionIndex]) == NULL)
{
// device does not support it
vlog_error("Platform supports extension \"%s\" but device does not\n",
extensions[extensionIndex]);
return -1;
}
}
return 0;
}
int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
cl_platform_id platforms[16];
cl_uint num_platforms;
char *string_returned;
string_returned = (char*)malloc(8192);
int total_errors = 0;
int err = CL_SUCCESS;
err = clGetPlatformIDs(16, platforms, &num_platforms);
test_error(err, "clGetPlatformIDs failed");
if (num_platforms <= 16) {
// Try with NULL
err = clGetPlatformIDs(num_platforms, platforms, NULL);
test_error(err, "clGetPlatformIDs failed with NULL for return size");
}
if (num_platforms < 1) {
log_error("Found 0 platforms.\n");
return -1;
}
log_info("Found %d platforms.\n", num_platforms);
for (int p=0; p<(int)num_platforms; p++) {
cl_device_id *devices;
cl_uint num_devices;
size_t size;
log_info("Platform %d (%p):\n", p, platforms[p]);
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_PROFILE, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_PROFILE failed");
log_info("\tCL_PLATFORM_PROFILE: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VERSION, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_VERSION failed");
log_info("\tCL_PLATFORM_VERSION: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_NAME failed");
log_info("\tCL_PLATFORM_NAME: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VENDOR, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_VENDOR failed");
log_info("\tCL_PLATFORM_VENDOR: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_EXTENSIONS, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_EXTENSIONS failed");
log_info("\tCL_PLATFORM_EXTENSIONS: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
test_error(err, "clGetDeviceIDs size failed.\n");
devices = (cl_device_id *)malloc(num_devices*sizeof(cl_device_id));
memset(devices, 0, sizeof(cl_device_id)*num_devices);
err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
test_error(err, "clGetDeviceIDs failed.\n");
log_info("\tPlatform has %d devices.\n", (int)num_devices);
for (int d=0; d<(int)num_devices; d++) {
size_t returned_size;
cl_platform_id returned_platform;
cl_context context;
cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[p], 0 };
err = clGetDeviceInfo(devices[d], CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &returned_platform, &returned_size);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM\n");
if (returned_size != sizeof(cl_platform_id)) {
log_error("Reported return size (%ld) does not match expected size (%ld).\n", returned_size, sizeof(cl_platform_id));
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 8192, string_returned, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_NAME\n");
log_info("\t\tPlatform for device %d (%s) is %p.\n", d, string_returned, returned_platform);
log_info("\t\t\tTesting clCreateContext for the platform/device...\n");
// Try creating a context for the platform
context = clCreateContext(properties, 1, &devices[d], NULL, NULL, &err);
test_error(err, "\t\tclCreateContext failed for device with platform properties\n");
memset(properties, 0, sizeof(cl_context_properties)*3);
err = clGetContextInfo(context, CL_CONTEXT_PROPERTIES, sizeof(cl_context_properties)*3, properties, &returned_size);
test_error(err, "clGetContextInfo for CL_CONTEXT_PROPERTIES failed");
if (returned_size != sizeof(cl_context_properties)*3) {
log_error("Invalid size returned from clGetContextInfo for CL_CONTEXT_PROPERTIES. Got %ld, expected %ld.\n",
returned_size, sizeof(cl_context_properties)*3);
total_errors++;
}
if (properties[0] != (cl_context_properties)CL_CONTEXT_PLATFORM || properties[1] != (cl_context_properties)platforms[p]) {
log_error("Wrong properties returned. Expected: [%p %p], got [%p %p]\n",
(void*)CL_CONTEXT_PLATFORM, platforms[p], (void*)properties[0], (void*)properties[1]);
total_errors++;
}
err = clReleaseContext(context);
test_error(err, "clReleaseContext failed");
}
free(devices);
}
free(string_returned);
return total_errors;
}

View File

@@ -0,0 +1,635 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/imageHelpers.h"
#include <stdlib.h>
#include <ctype.h>
int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_platform_id platform;
cl_int error;
char buffer[ 4098 ];
size_t length;
// Get the platform to use
error = clGetPlatformIDs(1, &platform, NULL);
test_error( error, "Unable to get platform" );
// Platform profile should either be FULL_PROFILE or EMBEDDED_PROFILE
error = clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, sizeof( buffer ), buffer, &length );
test_error( error, "Unable to get platform profile string" );
log_info("Returned CL_PLATFORM_PROFILE %s.\n", buffer);
if( strcmp( buffer, "FULL_PROFILE" ) != 0 && strcmp( buffer, "EMBEDDED_PROFILE" ) != 0 )
{
log_error( "ERROR: Returned platform profile string is not a valid string by OpenCL 1.2! (Returned: %s)\n", buffer );
return -1;
}
if( strlen( buffer )+1 != length )
{
log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
(int)strlen( buffer )+1, (int)length );
return -1;
}
// Check just length return
error = clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, 0, NULL, &length );
test_error( error, "Unable to get platform profile length" );
if( strlen( (char *)buffer )+1 != length )
{
log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
(int)strlen( (char *)buffer )+1, (int)length );
return -1;
}
// Platform version should fit the regex "OpenCL *[0-9]+\.[0-9]+"
error = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof( buffer ), buffer, &length );
test_error( error, "Unable to get platform version string" );
log_info("Returned CL_PLATFORM_VERSION %s.\n", buffer);
if( memcmp( buffer, "OpenCL ", strlen( "OpenCL " ) ) != 0 )
{
log_error( "ERROR: Initial part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
return -1;
}
char *p1 = (char *)buffer + strlen( "OpenCL " );
while( *p1 == ' ' )
p1++;
char *p2 = p1;
while( isdigit( *p2 ) )
p2++;
if( *p2 != '.' )
{
log_error( "ERROR: Numeric part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
return -1;
}
char *p3 = p2 + 1;
while( isdigit( *p3 ) )
p3++;
if( *p3 != ' ' )
{
log_error( "ERROR: space expected after minor version number! (returned: %s)\n", (char *)buffer );
return -1;
}
*p2 = ' '; // Put in a space for atoi below.
p2++;
// make sure it is null terminated
for( ; p3 != buffer + length; p3++ )
if( *p3 == '\0' )
break;
if( p3 == buffer + length )
{
log_error( "ERROR: platform version string is not NUL terminated!\n" );
return -1;
}
int major = atoi( p1 );
int minor = atoi( p2 );
int minor_revision = 2;
if( major * 10 + minor < 10 + minor_revision )
{
log_error( "ERROR: OpenCL profile version returned is less than 1.%d!\n", minor_revision );
return -1;
}
// Sanity checks on the returned values
if( length != strlen( (char *)buffer ) + 1)
{
log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer )+1, (int)length );
return -1;
}
// Check just length
error = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &length );
test_error( error, "Unable to get platform version length" );
if( length != strlen( (char *)buffer )+1 )
{
log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( buffer )+1, (int)length );
return -1;
}
return 0;
}
int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
size_t size;
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
clSamplerWrapper sampler = clCreateSampler( context, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_LINEAR, &error );
test_error( error, "Unable to create sampler to test with" );
cl_uint refCount;
error = clGetSamplerInfo( sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get sampler ref count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of sampler refcount does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
return -1;
}
cl_context otherCtx;
error = clGetSamplerInfo( sampler, CL_SAMPLER_CONTEXT, sizeof( otherCtx ), &otherCtx, &size );
test_error( error, "Unable to get sampler context" );
if( otherCtx != context )
{
log_error( "ERROR: Sampler context does not validate! (expected %p, got %p)\n", context, otherCtx );
return -1;
}
if( size != sizeof( otherCtx ) )
{
log_error( "ERROR: Returned size of sampler context does not validate! (expected %d, got %d)\n", (int)sizeof( otherCtx ), (int)size );
return -1;
}
cl_addressing_mode mode;
error = clGetSamplerInfo( sampler, CL_SAMPLER_ADDRESSING_MODE, sizeof( mode ), &mode, &size );
test_error( error, "Unable to get sampler addressing mode" );
if( mode != CL_ADDRESS_CLAMP )
{
log_error( "ERROR: Sampler addressing mode does not validate! (expected %d, got %d)\n", (int)CL_ADDRESS_CLAMP, (int)mode );
return -1;
}
if( size != sizeof( mode ) )
{
log_error( "ERROR: Returned size of sampler addressing mode does not validate! (expected %d, got %d)\n", (int)sizeof( mode ), (int)size );
return -1;
}
cl_filter_mode fmode;
error = clGetSamplerInfo( sampler, CL_SAMPLER_FILTER_MODE, sizeof( fmode ), &fmode, &size );
test_error( error, "Unable to get sampler filter mode" );
if( fmode != CL_FILTER_LINEAR )
{
log_error( "ERROR: Sampler filter mode does not validate! (expected %d, got %d)\n", (int)CL_FILTER_LINEAR, (int)fmode );
return -1;
}
if( size != sizeof( fmode ) )
{
log_error( "ERROR: Returned size of sampler filter mode does not validate! (expected %d, got %d)\n", (int)sizeof( fmode ), (int)size );
return -1;
}
cl_int norm;
error = clGetSamplerInfo( sampler, CL_SAMPLER_NORMALIZED_COORDS, sizeof( norm ), &norm, &size );
test_error( error, "Unable to get sampler normalized flag" );
if( norm != CL_TRUE )
{
log_error( "ERROR: Sampler normalized flag does not validate! (expected %d, got %d)\n", (int)CL_TRUE, (int)norm );
return -1;
}
if( size != sizeof( norm ) )
{
log_error( "ERROR: Returned size of sampler normalized flag does not validate! (expected %d, got %d)\n", (int)sizeof( norm ), (int)size );
return -1;
}
return 0;
}
#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast ) \
error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get command queue " name ); \
if( val != expected ) \
{ \
log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val ); \
return -1; \
} \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
return -1; \
}
int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
{
int error;
size_t size;
cl_command_queue_properties device_props;
clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL);
log_info("CL_DEVICE_QUEUE_PROPERTIES is %d\n", (int)device_props);
clCommandQueueWrapper queue = clCreateCommandQueue( context, deviceID, device_props, &error );
test_error( error, "Unable to create command queue to test with" );
cl_uint refCount;
error = clGetCommandQueueInfo( queue, CL_QUEUE_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get command queue reference count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of command queue reference count does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
return -1;
}
cl_context otherCtx;
TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_CONTEXT, otherCtx, context, "context", "%p", cl_context )
cl_device_id otherDevice;
error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof(otherDevice), &otherDevice, &size);
test_error(error, "clGetCommandQueue failed.");
if (size != sizeof(cl_device_id)) {
log_error( " ERROR: Returned size of command queue CL_QUEUE_DEVICE does not validate! (expected %d, got %d)\n", (int)sizeof( otherDevice ), (int)size );
return -1;
}
/* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */
cl_uint otherDevice_vid, deviceID_vid;
error = clGetDeviceInfo(otherDevice, CL_DEVICE_VENDOR_ID, sizeof(otherDevice_vid), &otherDevice_vid, NULL );
test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL );
test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
if( otherDevice_vid != deviceID_vid )
{
log_error( "ERROR: Incorrect device returned for queue! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, otherDevice_vid );
return -1;
}
cl_command_queue_properties props;
TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_PROPERTIES, props, (unsigned int)( device_props ), "properties", "%d", unsigned int )
return 0;
}
int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
{
int error;
size_t size;
cl_context_properties props;
error = clGetContextInfo( context, CL_CONTEXT_PROPERTIES, sizeof( props ), &props, &size );
test_error( error, "Unable to get context props" );
if (size == 0) {
// Valid size
return 0;
} else if (size == sizeof(cl_context_properties)) {
// Data must be NULL
if (props != 0) {
log_error("ERROR: Returned properties is no NULL.\n");
return -1;
}
// Valid data and size
return 0;
}
// Size was not 0 or 1
log_error( "ERROR: Returned size of context props is not valid! (expected 0 or %d, got %d)\n",
(int)sizeof(cl_context_properties), (int)size );
return -1;
}
#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast ) \
error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get mem object " name ); \
if( val != expected ) \
{ \
log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type ")\n", (cast)(expected), (cast)val ); \
return -1; \
} \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
return -1; \
}
void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
{
free( data );
}
// All possible combinations of valid cl_mem_flags.
static cl_mem_flags all_flags[16] = {
0,
CL_MEM_READ_WRITE,
CL_MEM_READ_ONLY,
CL_MEM_WRITE_ONLY,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
};
#define TEST_DEVICE_PARAM( device, paramName, val, name, type, cast ) \
error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get device " name ); \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
return -1; \
} \
log_info( "\tReported device " name " : " type "\n", (cast)val );
#define TEST_DEVICE_PARAM_MEM( device, paramName, val, name, type, div ) \
error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get device " name ); \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
return -1; \
} \
log_info( "\tReported device " name " : " type "\n", (int)( val / div ) );
int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
{
int error;
size_t size;
cl_uint vendorID;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_VENDOR_ID, vendorID, "vendor ID", "0x%08x", int )
char extensions[ 10240 ];
error = clGetDeviceInfo( deviceID, CL_DEVICE_EXTENSIONS, sizeof( extensions ), &extensions, &size );
test_error( error, "Unable to get device extensions" );
if( size != strlen( extensions ) + 1 )
{
log_error( "ERROR: Returned size of device extensions does not validate! (expected %d, got %d)\n", (int)( strlen( extensions ) + 1 ), (int)size );
return -1;
}
log_info( "\tReported device extensions: %s \n", extensions );
cl_uint preferred;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, preferred, "preferred vector char width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, preferred, "preferred vector short width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, preferred, "preferred vector int width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, preferred, "preferred vector long width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, preferred, "preferred vector float width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, preferred, "preferred vector double width", "%d", int )
// Note that even if cl_khr_fp64, the preferred width for double can be non-zero. For example, vendors
// extensions can support double but may not support cl_khr_fp64, which implies math library support.
cl_uint baseAddrAlign;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, baseAddrAlign, "base address alignment", "%d bytes", int )
cl_uint maxDataAlign;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, maxDataAlign, "min data type alignment", "%d bytes", int )
cl_device_mem_cache_type cacheType;
error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof( cacheType ), &cacheType, &size );
test_error( error, "Unable to get device global mem cache type" );
if( size != sizeof( cacheType ) )
{
log_error( "ERROR: Returned size of device global mem cache type does not validate! (expected %d, got %d)\n", (int)sizeof( cacheType ), (int)size );
return -1;
}
const char *cacheTypeName = ( cacheType == CL_NONE ) ? "CL_NONE" : ( cacheType == CL_READ_ONLY_CACHE ) ? "CL_READ_ONLY_CACHE" : ( cacheType == CL_READ_WRITE_CACHE ) ? "CL_READ_WRITE_CACHE" : "<unknown>";
log_info( "\tReported device global mem cache type: %s \n", cacheTypeName );
cl_uint cachelineSize;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cachelineSize, "global mem cacheline size", "%d bytes", int )
cl_ulong cacheSize;
TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cacheSize, "global mem cache size", "%d KB", 1024 )
cl_ulong memSize;
TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, memSize, "global mem size", "%d MB", ( 1024 * 1024 ) )
cl_device_local_mem_type localMemType;
error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_TYPE, sizeof( localMemType ), &localMemType, &size );
test_error( error, "Unable to get device local mem type" );
if( size != sizeof( cacheType ) )
{
log_error( "ERROR: Returned size of device local mem type does not validate! (expected %d, got %d)\n", (int)sizeof( localMemType ), (int)size );
return -1;
}
const char *localMemTypeName = ( localMemType == CL_LOCAL ) ? "CL_LOCAL" : ( cacheType == CL_GLOBAL ) ? "CL_GLOBAL" : "<unknown>";
log_info( "\tReported device local mem type: %s \n", localMemTypeName );
cl_bool errSupport;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ERROR_CORRECTION_SUPPORT, errSupport, "error correction support", "%d", int )
size_t timerResolution;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PROFILING_TIMER_RESOLUTION, timerResolution, "profiling timer resolution", "%ld nanoseconds", long )
cl_bool endian;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ENDIAN_LITTLE, endian, "little endian flag", "%d", int )
cl_bool avail;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_AVAILABLE, avail, "available flag", "%d", int )
cl_bool compilerAvail;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_COMPILER_AVAILABLE, compilerAvail, "compiler available flag", "%d", int )
char profile[ 1024 ];
error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof( profile ), &profile, &size );
test_error( error, "Unable to get device profile" );
if( size != strlen( profile ) + 1 )
{
log_error( "ERROR: Returned size of device profile does not validate! (expected %d, got %d)\n", (int)( strlen( profile ) + 1 ), (int)size );
return -1;
}
if( strcmp( profile, "FULL_PROFILE" ) != 0 && strcmp( profile, "EMBEDDED_PROFILE" ) != 0 )
{
log_error( "ERROR: Returned profile of device not FULL or EMBEDDED as required by OpenCL 1.2! (Returned %s)\n", profile );
return -1;
}
log_info( "\tReported device profile: %s \n", profile );
return 0;
}
static const char *sample_compile_size[2] = {
"__kernel void sample_test(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" dst[tid] = src[tid];\n"
"\n"
"}\n",
"__kernel __attribute__((reqd_work_group_size(%d,%d,%d))) void sample_test(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" dst[tid] = src[tid];\n"
"\n"
"}\n" };
int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
size_t realSize;
size_t kernel_max_workgroup_size;
size_t global[] = {64,14,10};
size_t local[] = {0,0,0};
cl_uint max_dimensions;
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_dimensions), &max_dimensions, NULL);
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
log_info("Device reported CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = %d.\n", (int)max_dimensions);
{
clProgramWrapper program;
clKernelWrapper kernel;
error = create_single_kernel_helper( context, &program, &kernel, 1, &sample_compile_size[ 0 ], "sample_test" );
if( error != 0 )
return error;
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(kernel_max_workgroup_size), &kernel_max_workgroup_size, NULL);
test_error( error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
log_info("The CL_KERNEL_WORK_GROUP_SIZE for the kernel is %d.\n", (int)kernel_max_workgroup_size);
size_t size[ 3 ];
error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
test_error( error, "Unable to get work group info" );
if( size[ 0 ] != 0 || size[ 1 ] != 0 || size[ 2 ] != 0 )
{
log_error( "ERROR: Nonzero compile work group size returned for nonspecified size! (returned %d,%d,%d)\n", (int)size[0], (int)size[1], (int)size[2] );
return -1;
}
if( realSize != sizeof( size ) )
{
log_error( "ERROR: Returned size of compile work group size not valid! (Expected %d, got %d)\n", (int)sizeof( size ), (int)realSize );
return -1;
}
// Determine some local dimensions to use for the test.
if (max_dimensions == 1) {
error = get_max_common_work_group_size(context, kernel, global[0], &local[0]);
test_error( error, "get_max_common_work_group_size failed");
log_info("For global dimension %d, kernel will require local dimension %d.\n", (int)global[0], (int)local[0]);
} else if (max_dimensions == 2) {
error = get_max_common_2D_work_group_size(context, kernel, global, local);
test_error( error, "get_max_common_2D_work_group_size failed");
log_info("For global dimension %d x %d, kernel will require local dimension %d x %d.\n", (int)global[0], (int)global[1], (int)local[0], (int)local[1]);
} else {
error = get_max_common_3D_work_group_size(context, kernel, global, local);
test_error( error, "get_max_common_3D_work_group_size failed");
log_info("For global dimension %d x %d x %d, kernel will require local dimension %d x %d x %d.\n",
(int)global[0], (int)global[1], (int)global[2], (int)local[0], (int)local[1], (int)local[2]);
}
}
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper in, out;
//char source[1024];
char *source = (char*)malloc(1024);
source[0] = '\0';
sprintf(source, sample_compile_size[1], local[0], local[1], local[2]);
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&source, "sample_test" );
if( error != 0 )
return error;
size_t size[ 3 ];
error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
test_error( error, "Unable to get work group info" );
if( size[ 0 ] != local[0] || size[ 1 ] != local[1] || size[ 2 ] != local[2] )
{
log_error( "ERROR: Incorrect compile work group size returned for specified size! (returned %d,%d,%d, expected %d,%d,%d)\n",
(int)size[0], (int)size[1], (int)size[2], (int)local[0], (int)local[1], (int)local[2]);
return -1;
}
// Verify that the kernel will only execute with that size.
in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*global[0], NULL, &error);
test_error(error, "clCreateBuffer failed");
out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*global[0], NULL, &error);
test_error(error, "clCreateBuffer failed");
error = clSetKernelArg(kernel, 0, sizeof(in), &in);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 1, sizeof(out), &out);
test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
log_info("kernel_required_group_size may report spurious ERRORS in the conformance log.\n");
local[0]++;
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
if (error != CL_INVALID_WORK_GROUP_SIZE) {
log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
(int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2] );
print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
return -1;
}
error = clFinish(queue);
test_error(error, "clFinish failed");
if (max_dimensions == 1) {
free(source);
return 0;
}
local[0]--; local[1]++;
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
if (error != CL_INVALID_WORK_GROUP_SIZE) {
log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
(int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
return -1;
}
error = clFinish(queue);
test_error(error, "clFinish failed");
if (max_dimensions == 2) {
return 0;
free(source);
}
local[1]--; local[2]++;
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
if (error != CL_INVALID_WORK_GROUP_SIZE) {
log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
(int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
return -1;
}
error = clFinish(queue);
test_error(error, "clFinish failed");
free(source);
}
return 0;
}

View File

@@ -0,0 +1,234 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif // !_WIN32
// Note: According to spec, the various functions to get instance counts should return an error when passed in an object
// that has already been released. However, the spec is out of date. If it gets re-updated to allow such action, re-enable
// this define.
//#define VERIFY_AFTER_RELEASE 1
#define GET_QUEUE_INSTANCE_COUNT(p) numInstances = ( (err = clGetCommandQueueInfo(p, CL_QUEUE_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
#define GET_MEM_INSTANCE_COUNT(p) numInstances = ( (err = clGetMemObjectInfo(p, CL_MEM_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
#define VERIFY_INSTANCE_COUNT(c,rightValue) if( c != rightValue ) { \
log_error( "ERROR: Instance count for test object is not valid! (should be %d, really is %d)\n", rightValue, c ); \
return -1; }
int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
{
cl_command_queue queue;
cl_uint numInstances;
int err;
/* Create a test queue */
queue = clCreateCommandQueue( context, deviceID, 0, &err );
test_error( err, "Unable to create command queue to test with" );
/* Test the instance count */
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* Now release the program */
clReleaseCommandQueue( queue );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_QUEUE_INSTANCE_COUNT( queue );
if( err != CL_INVALID_COMMAND_QUEUE )
{
print_error( err, "Command queue was not properly released" );
return -1;
}
#endif
return 0;
}
int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
{
cl_command_queue queue;
unsigned int numInstances, i;
int err;
/* Create a test program */
queue = clCreateCommandQueue( context, deviceID, 0, &err );
test_error( err, "Unable to create command queue to test with" );
/* Increment 9 times, which should bring the count to 10 */
for( i = 0; i < 9; i++ )
{
clRetainCommandQueue( queue );
}
/* Test the instance count */
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 10 );
/* Now release 5 times, which should take us to 5 */
for( i = 0; i < 5; i++ )
{
clReleaseCommandQueue( queue );
}
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 5 );
/* Retain again three times, which should take us to 8 */
for( i = 0; i < 3; i++ )
{
clRetainCommandQueue( queue );
}
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 8 );
/* Release 7 times, which should take it to 1 */
for( i = 0; i < 7; i++ )
{
clReleaseCommandQueue( queue );
}
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* And one last one */
clReleaseCommandQueue( queue );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_QUEUE_INSTANCE_COUNT( queue );
if( err != CL_INVALID_COMMAND_QUEUE )
{
print_error( err, "Command queue was not properly released" );
return -1;
}
#endif
return 0;
}
int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem object;
cl_uint numInstances;
int err;
/* Create a test object */
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
test_error( err, "Unable to create buffer to test with" );
/* Test the instance count */
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* Now release the program */
clReleaseMemObject( object );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_MEM_INSTANCE_COUNT( object );
if( err != CL_INVALID_MEM_OBJECT )
{
print_error( err, "Mem object was not properly released" );
return -1;
}
#endif
return 0;
}
int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem object;
unsigned int numInstances, i;
int err;
/* Create a test object */
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
test_error( err, "Unable to create buffer to test with" );
/* Increment 9 times, which should bring the count to 10 */
for( i = 0; i < 9; i++ )
{
clRetainMemObject( object );
}
/* Test the instance count */
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 10 );
/* Now release 5 times, which should take us to 5 */
for( i = 0; i < 5; i++ )
{
clReleaseMemObject( object );
}
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 5 );
/* Retain again three times, which should take us to 8 */
for( i = 0; i < 3; i++ )
{
clRetainMemObject( object );
}
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 8 );
/* Release 7 times, which should take it to 1 */
for( i = 0; i < 7; i++ )
{
clReleaseMemObject( object );
}
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* And one last one */
clReleaseMemObject( object );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_MEM_INSTANCE_COUNT( object );
if( err != CL_INVALID_MEM_OBJECT )
{
print_error( err, "Mem object was not properly released" );
return -1;
}
#endif
return 0;
}

View File

@@ -0,0 +1,109 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
#include "../../test_common/harness/compat.h"
int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_program program;
cl_kernel kernel;
int error;
const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" };
/* Create a test program */
program = clCreateProgramWithSource( context, 1, testProgram, NULL, &error);
test_error( error, "Unable to create program to test with" );
/* Compile the program */
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build sample program to test with" );
/* And create a kernel from it */
kernel = clCreateKernel( program, "sample_test", &error );
test_error( error, "Unable to create kernel" );
/* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */
clReleaseProgram( program );
clReleaseKernel( kernel );
/* If we got here fine, we succeeded. If not, well, we won't be able to return an error :) */
return 0;
}
const char *sample_delay_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" for( int i = 0; i < 1000000; i++ ); \n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
int test_release_during_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_kernel kernel;
cl_mem streams[2];
size_t threads[1] = { 10 }, localThreadSize;
/* We now need an event to test. So we'll execute a kernel to get one */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_delay_kernel, "sample_test" ) )
{
return -1;
}
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[ 0 ]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[ 1 ]);
test_error( error, "Unable to set indexed kernel arguments" );
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreadSize );
test_error( error, "Unable to calc local thread size" );
/* Execute the kernel */
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &localThreadSize, 0, NULL, NULL );
test_error( error, "Unable to execute test kernel" );
/* The kernel should still be executing, but we should still be able to release it. It's not terribly
useful, but we should be able to do it, if the internal refcounting is indeed correct. */
clReleaseMemObject( streams[ 1 ] );
clReleaseMemObject( streams[ 0 ] );
clReleaseKernel( kernel );
clReleaseProgram( program );
/* Now make sure we're really finished before we go on. */
error = clFinish(queue);
test_error( error, "Unable to finish context.");
return 0;
}

View File

@@ -0,0 +1,65 @@
set(MODULE_NAME COMPATIBILITY_BASIC)
set(${MODULE_NAME}_SOURCES
main.c
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
test_hiloeo.c test_local.c test_pointercast.c
test_if.c test_loop.c
test_readimage.c test_readimage_int16.c test_readimage_fp32.c
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
test_multireadimageonefmt.c test_multireadimagemultifmt.c
test_imagedim.c
test_vloadstore.c
test_int2float.c test_float2int.c
test_createkernelsinprogram.c
test_hostptr.c
test_explicit_s2v.cpp
test_constant.c
test_image_multipass.c
test_imagereadwrite.c test_imagereadwrite3d.c
test_image_param.c
test_imagenpot.c
test_image_r8.c
test_barrier.c
test_basic_parameter_types.c
test_arrayreadwrite.c
test_arraycopy.c
test_imagearraycopy.c
test_imagearraycopy3d.c
test_imagecopy.c
test_imagerandomcopy.c
test_arrayimagecopy.c
test_arrayimagecopy3d.c
test_imagecopy3d.c
test_enqueue_map.cpp
test_work_item_functions.cpp
test_astype.cpp
test_async_copy.cpp
test_sizeof.c
test_vector_creation.cpp
test_vec_type_hint.c
test_numeric_constants.cpp
test_constant_source.cpp
test_bufferreadwriterect.c
test_async_strided_copy.cpp
test_preprocessors.cpp
test_kernel_memory_alignment.cpp
test_global_work_offsets.cpp
test_kernel_call_kernel_function.cpp
test_local_kernel_scope.cpp
../../test_common/harness/errorHelpers.c
../../test_common/harness/threadTesting.c
../../test_common/harness/testHarness.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/typeWrappers.cpp
../../test_common/harness/imageHelpers.cpp
../../test_common/harness/mt19937.c
../../test_common/harness/conversions.c
../../test_common/harness/rounding_mode.c
../../test_common/harness/msvc9.c
)
include(../../../CMakeCommon.txt)

View File

@@ -0,0 +1,75 @@
project
: requirements
<toolset>gcc:<cflags>-xc++
<toolset>msvc:<cflags>"/TP"
;
exe test_basic
: main.c
test_arraycopy.c
test_arrayimagecopy3d.c
test_arrayimagecopy.c
test_arrayreadwrite.c
test_astype.cpp
test_async_copy.cpp
test_barrier.c
test_basic_parameter_types.c
test_constant.c
test_createkernelsinprogram.c
test_enqueue_map.cpp
test_explicit_s2v.cpp
test_float2int.c
test_fpmath_float2.c
test_fpmath_float4.c
test_fpmath_float.c
test_hiloeo.c
test_hostptr.c
test_if.c
test_imagearraycopy3d.c
test_imagearraycopy.c
test_imagecopy3d.c
test_imagecopy.c
test_imagedim.c
test_image_multipass.c
test_imagenpot.c
test_image_param.c
test_image_r8.c
test_imagerandomcopy.c
test_imagereadwrite3d.c
test_imagereadwrite.c
test_int2float.c
test_intmath_int2.c
test_intmath_int4.c
test_intmath_int.c
test_intmath_long2.c
test_intmath_long4.c
test_intmath_long.c
test_local.c
test_loop.c
test_multireadimagemultifmt.c
test_multireadimageonefmt.c
test_pointercast.c
test_readimage3d.c
test_readimage3d_fp32.c
test_readimage3d_int16.c
test_readimage.c
test_readimage_fp32.c
test_readimage_int16.c
test_sizeof.c
test_vec_type_hint.c
test_vector_creation.cpp
test_vloadstore.c
test_work_item_functions.cpp
test_writeimage.c
test_writeimage_fp32.c
test_writeimage_int16.c
test_numeric_constants.cpp
test_kernel_call_kernel_function.cpp
;
install dist
: test_basic
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/basic
<variant>release:<location>$(DIST)/release/tests/test_conformance/basic
;

View File

@@ -0,0 +1,94 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.c \
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c \
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c \
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c \
test_hiloeo.c test_local.c test_local_kernel_scope.cpp test_pointercast.c \
test_if.c test_sizeof.c test_loop.c \
test_readimage.c test_readimage_int16.c test_readimage_fp32.c \
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c \
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c \
test_multireadimageonefmt.c test_multireadimagemultifmt.c \
test_imagedim.c \
test_vloadstore.c \
test_int2float.c test_float2int.c \
test_createkernelsinprogram.c \
test_hostptr.c \
test_explicit_s2v.cpp \
test_constant.c \
test_constant_source.cpp \
test_image_multipass.c \
test_imagereadwrite.c test_imagereadwrite3d.c \
test_bufferreadwriterect.c \
test_image_param.c \
test_imagenpot.c \
test_image_r8.c \
test_barrier.c \
test_arrayreadwrite.c \
test_arraycopy.c \
test_imagearraycopy.c \
test_imagearraycopy3d.c \
test_imagecopy.c \
test_imagerandomcopy.c \
test_arrayimagecopy.c \
test_arrayimagecopy3d.c\
test_imagecopy3d.c \
test_enqueue_map.cpp \
test_work_item_functions.cpp \
test_astype.cpp \
test_async_copy.cpp \
test_async_strided_copy.cpp \
test_numeric_constants.cpp \
test_kernel_call_kernel_function.cpp \
test_basic_parameter_types.c \
test_vector_creation.cpp \
test_vec_type_hint.c \
test_preprocessors.cpp \
test_kernel_memory_alignment.cpp \
test_global_work_offsets.cpp \
../../test_common/harness/errorHelpers.c \
../../test_common/harness/threadTesting.c \
../../test_common/harness/testHarness.c \
../../test_common/harness/rounding_mode.c \
../../test_common/harness/kernelHelpers.c \
../../test_common/harness/typeWrappers.cpp \
../../test_common/harness/imageHelpers.cpp \
../../test_common/harness/mt19937.c \
../../test_common/harness/conversions.c
DEFINES =
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
FRAMEWORK = $(SOURCES)
HEADERS =
TARGET = test_basic
INCLUDE =
COMPILERFLAGS = -c -Wall -g -O0 -Wshorten-64-to-32
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,263 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
#include <stdio.h>
#include <string.h>
#include "../../test_common/harness/testHarness.h"
#include "procs.h"
// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
// (for example, generate_random_image_data()), the tests are required to declare
// the following variables:
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
bool gTestRounding = false;
basefn basefn_list[] = {
test_hostptr,
test_fpmath_float,
test_fpmath_float2,
test_fpmath_float4,
test_intmath_int,
test_intmath_int2,
test_intmath_int4,
test_intmath_long,
test_intmath_long2,
test_intmath_long4,
test_hiloeo,
test_if,
test_sizeof,
test_loop,
test_pointer_cast,
test_local_arg_def,
test_local_kernel_def,
test_local_kernel_scope,
test_constant,
test_constant_source,
test_readimage,
test_readimage_int16,
test_readimage_fp32,
test_writeimage,
test_writeimage_int16,
test_writeimage_fp32,
test_multireadimageonefmt,
test_multireadimagemultifmt,
test_image_r8,
test_barrier,
test_int2float,
test_float2int,
test_imagereadwrite,
test_imagereadwrite3d,
test_readimage3d,
test_readimage3d_int16,
test_readimage3d_fp32,
test_bufferreadwriterect,
test_arrayreadwrite,
test_arraycopy,
test_imagearraycopy,
test_imagearraycopy3d,
test_imagecopy,
test_imagecopy3d,
test_imagerandomcopy,
test_arrayimagecopy,
test_arrayimagecopy3d,
test_imagenpot,
test_vload_global,
test_vload_local,
test_vload_constant,
test_vload_private,
test_vstore_global,
test_vstore_local,
test_vstore_private,
test_createkernelsinprogram,
test_imagedim_pow2,
test_imagedim_non_pow2,
test_image_param,
test_image_multipass_integer_coord,
test_image_multipass_float_coord,
test_explicit_s2v_bool,
test_explicit_s2v_char,
test_explicit_s2v_uchar,
test_explicit_s2v_short,
test_explicit_s2v_ushort,
test_explicit_s2v_int,
test_explicit_s2v_uint,
test_explicit_s2v_long,
test_explicit_s2v_ulong,
test_explicit_s2v_float,
test_explicit_s2v_double,
test_enqueue_map_buffer,
test_enqueue_map_image,
test_work_item_functions,
test_astype,
test_async_copy_global_to_local,
test_async_copy_local_to_global,
test_async_strided_copy_global_to_local,
test_async_strided_copy_local_to_global,
test_prefetch,
test_kernel_call_kernel_function,
test_host_numeric_constants,
test_kernel_numeric_constants,
test_kernel_limit_constants,
test_kernel_preprocessor_macros,
test_basic_parameter_types,
test_vector_creation,
test_vec_type_hint,
test_kernel_memory_alignment_local,
test_kernel_memory_alignment_global,
test_kernel_memory_alignment_constant,
test_kernel_memory_alignment_private,
test_global_work_offsets,
test_get_global_offset
};
const char *basefn_names[] = {
"hostptr",
"fpmath_float",
"fpmath_float2",
"fpmath_float4",
"intmath_int",
"intmath_int2",
"intmath_int4",
"intmath_long",
"intmath_long2",
"intmath_long4",
"hiloeo",
"if",
"sizeof",
"loop",
"pointer_cast",
"local_arg_def",
"local_kernel_def",
"local_kernel_scope",
"constant",
"constant_source",
"readimage",
"readimage_int16",
"readimage_fp32",
"writeimage",
"writeimage_int16",
"writeimage_fp32",
"mri_one",
"mri_multiple",
"image_r8",
"barrier",
"int2float",
"float2int",
"imagereadwrite",
"imagereadwrite3d",
"readimage3d",
"readimage3d_int16",
"readimage3d_fp32",
"bufferreadwriterect",
"arrayreadwrite",
"arraycopy",
"imagearraycopy",
"imagearraycopy3d",
"imagecopy",
"imagecopy3d",
"imagerandomcopy",
"arrayimagecopy",
"arrayimagecopy3d",
"imagenpot",
"vload_global",
"vload_local",
"vload_constant",
"vload_private",
"vstore_global",
"vstore_local",
"vstore_private",
"createkernelsinprogram",
"imagedim_pow2",
"imagedim_non_pow2",
"image_param",
"image_multipass_integer_coord",
"image_multipass_float_coord",
"explicit_s2v_bool",
"explicit_s2v_char",
"explicit_s2v_uchar",
"explicit_s2v_short",
"explicit_s2v_ushort",
"explicit_s2v_int",
"explicit_s2v_uint",
"explicit_s2v_long",
"explicit_s2v_ulong",
"explicit_s2v_float",
"explicit_s2v_double",
"enqueue_map_buffer",
"enqueue_map_image",
"work_item_functions",
"astype",
"async_copy_global_to_local",
"async_copy_local_to_global",
"async_strided_copy_global_to_local",
"async_strided_copy_local_to_global",
"prefetch",
"kernel_call_kernel_function",
"host_numeric_constants",
"kernel_numeric_constants",
"kernel_limit_constants",
"kernel_preprocessor_macros",
"parameter_types",
"vector_creation",
"vec_type_hint",
"kernel_memory_alignment_local",
"kernel_memory_alignment_global",
"kernel_memory_alignment_constant",
"kernel_memory_alignment_private",
"global_work_offsets",
"get_global_offset",
};
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
int num_fns = sizeof(basefn_names) / sizeof(char *);
int main(int argc, const char *argv[])
{
int err = runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
return err;
}

View File

@@ -0,0 +1,142 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/rounding_mode.h"
extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
extern int test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_long2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_long4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_hiloeo(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_if(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_sizeof(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_loop(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_pointer_cast(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_local_arg_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_local_kernel_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_local_kernel_scope(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_constant_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_writeimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_writeimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_writeimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_multireadimageonefmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_multireadimagemultifmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_image_r8(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_simplebarrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_int2float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_float2int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagearraycopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagearraycopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagereadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagereadwrite3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage3d_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage3d_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements);
extern int test_imagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagerandomcopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
extern int test_arrayimagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_arrayimagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagenpot(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_sampler_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_sampler_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_createkernelsinprogram(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_single_large_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_multiple_max_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_arrayreadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagedim_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagedim_non_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_image_param(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_image_multipass_integer_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_image_multipass_float_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vload_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vload_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vload_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vload_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vstore_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vstore_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vstore_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_astype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_native_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_limit_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_global_work_offsets(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_get_global_offset(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );

View File

@@ -0,0 +1,3 @@
#!/bin/sh
cd `dirname $0`
./test_basic arrayreadwrite arraycopy bufferreadwriterect $@

View File

@@ -0,0 +1,3 @@
#!/bin/sh
cd `dirname $0`
./test_basic arrayimagecopy arrayimagecopy3d imagearraycopy

View File

@@ -0,0 +1,17 @@
#!/bin/sh
cd `dirname $0`
./test_basic \
imagecopy imagerandomcopy \
imagearraycopy imagearraycopy3d \
image_r8 \
readimage readimage_int16 readimage_fp32 \
writeimage writeimage_int16 writeimage_fp32 \
imagenpot \
image_param \
image_multipass_integer_coord \
readimage3d \
readimage3d_int16 \
readimage3d_fp32 \
imagereadwrite3d \
imagereadwrite \
$@

View File

@@ -0,0 +1,4 @@
#!/bin/sh
cd `dirname $0`
./test_basic mri_one mri_multiple

View File

@@ -0,0 +1,201 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *copy_kernel_code =
"__kernel void test_copy(__global unsigned int *src, __global unsigned int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src[tid];\n"
"}\n";
int
test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_uint *input_ptr, *output_ptr;
cl_mem streams[4], results;
cl_program program;
cl_kernel kernel;
unsigned num_elements = 128 * 1024;
cl_uint num_copies = 1;
size_t delta_offset;
unsigned i;
cl_int err;
MTdata d;
int error_count = 0;
input_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
// results
results = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed");
/*****************************************************************************************************************************************/
#pragma mark client backing
log_info("Testing CL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer\n");
// randomize data
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
// client backing
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
test_error(err, "clCreateBuffer failed");
delta_offset = num_elements * sizeof(cl_uint) / num_copies;
for (i=0; i<num_copies; i++)
{
size_t offset = i * delta_offset;
err = clEnqueueCopyBuffer(queue, streams[0], results, offset, offset, delta_offset, 0, NULL, NULL);
test_error(err, "clEnqueueCopyBuffer failed");
}
// Try upload from client backing
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (i=0; i<num_elements; i++)
{
if (input_ptr[i] != output_ptr[i])
{
err = -1;
error_count++;
}
}
if (err)
log_error("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer FAILED\n");
else
log_info("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer passed\n");
#pragma mark framework backing (no client data)
log_info("Testing with clEnqueueWriteBuffer and clEnqueueCopyBuffer\n");
// randomize data
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
// no backing
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE) , sizeof(cl_uint) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed");
for (i=0; i<num_copies; i++)
{
size_t offset = i * delta_offset;
// Copy the array up from host ptr
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_uint)*num_elements, input_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyBuffer(queue, streams[2], results, offset, offset, delta_offset, 0, NULL, NULL);
test_error(err, "clEnqueueCopyBuffer failed");
}
err = clEnqueueReadBuffer( queue, results, true, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (i=0; i<num_elements; i++)
{
if (input_ptr[i] != output_ptr[i])
{
err = -1;
error_count++;
break;
}
}
if (err)
log_error("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer FAILED\n");
else
log_info("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer passed\n");
/*****************************************************************************************************************************************/
#pragma mark kernel copy test
log_info("Testing CL_MEM_USE_HOST_PTR buffer with kernel copy\n");
// randomize data
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
free_mtdata(d); d= NULL;
// client backing
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
test_error(err, "clCreateBuffer failed");
err = create_single_kernel_helper(context, &program, &kernel, 1, &copy_kernel_code, "test_copy" );
test_error(err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel, 0, sizeof streams[3], &streams[3]);
err |= clSetKernelArg(kernel, 1, sizeof results, &results);
test_error(err, "clSetKernelArg failed");
size_t threads[3] = {num_elements, 0, 0};
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed");
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (i=0; i<num_elements; i++)
{
if (input_ptr[i] != output_ptr[i])
{
err = -1;
error_count++;
break;
}
}
// Keep track of multiple errors.
if (error_count != 0)
err = error_count;
if (err)
log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n");
else
log_info("\tCL_MEM_USE_HOST_PTR buffer with kernel copy passed\n");
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseMemObject(results);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,143 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int test_arrayimagecopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
{
cl_uchar *bufptr, *imgptr;
clMemWrapper buffer, image;
int img_width = 512;
int img_height = 512;
size_t elem_size;
size_t buffer_size;
int i;
cl_int err;
MTdata d;
cl_event copyevent;
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
test_error(err, "clGetImageInfo failed");
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
test_error(err, "clCreateBuffer failed");
bufptr = (cl_uchar*)malloc(buffer_size);
d = init_genrand( gRandomSeed );
bufptr = (cl_uchar*)malloc(buffer_size);
for (i=0; i<(int)buffer_size; i++) {
bufptr[i] = (cl_uchar)genrand_int32(d);
}
free_mtdata(d); d = NULL;
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
test_error(err, "clEnqueueCopyImageToBuffer failed");
imgptr = (cl_uchar*)malloc(buffer_size);
err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
test_error(err, "clEnqueueReadBuffer failed");
if (memcmp(bufptr, imgptr, buffer_size) != 0) {
log_error( "ERROR: Results did not validate!\n" );
unsigned char * inchar = (unsigned char*)bufptr;
unsigned char * outchar = (unsigned char*)imgptr;
int failuresPrinted = 0;
int i;
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
int failed = 0;
int j;
for (j=0; j<(int)elem_size; j++)
if (inchar[i+j] != outchar[i+j])
failed = 1;
char values[4096];
values[0] = 0;
if (failed) {
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
int j;
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
sprintf(values + strlen(values), "] != expected [");
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
err = -1;
}
free(bufptr);
free(imgptr);
if (err)
log_error("ARRAY to IMAGE copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
return err;
}
int test_arrayimagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_int err;
cl_image_format *formats;
cl_uint num_formats;
cl_uint i;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
test_error(err, "clGetSupportedImageFormats failed");
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
test_error(err, "clGetSupportedImageFormats failed");
for (i = 0; i < num_formats; i++) {
err |= test_arrayimagecopy_single_format(device, context, queue, &formats[i]);
}
if (err)
log_error("ARRAY to IMAGE copy test failed\n");
else
log_info("ARRAY to IMAGE copy test passed\n");
return err;
}

View File

@@ -0,0 +1,144 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int test_arrayimagecopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
{
cl_uchar *bufptr, *imgptr;
clMemWrapper buffer, image;
int img_width = 128;
int img_height = 128;
int img_depth = 32;
size_t elem_size;
size_t buffer_size;
int i;
cl_int err;
MTdata d;
cl_event copyevent;
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
test_error(err, "clGetImageInfo failed");
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
test_error(err, "clCreateBuffer failed");
bufptr = (cl_uchar*)malloc(buffer_size);
d = init_genrand( gRandomSeed );
bufptr = (cl_uchar*)malloc(buffer_size);
for (i=0; i<(int)buffer_size; i++) {
bufptr[i] = (cl_uchar)genrand_int32(d);
}
free_mtdata(d); d = NULL;
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
test_error(err, "clEnqueueCopyImageToBuffer failed");
imgptr = (cl_uchar*)malloc(buffer_size);
err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
test_error(err, "clEnqueueReadBuffer failed");
if (memcmp(bufptr, imgptr, buffer_size) != 0) {
log_error( "ERROR: Results did not validate!\n" );
unsigned char * inchar = (unsigned char*)bufptr;
unsigned char * outchar = (unsigned char*)imgptr;
int failuresPrinted = 0;
int i;
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
int failed = 0;
int j;
for (j=0; j<(int)elem_size; j++)
if (inchar[i+j] != outchar[i+j])
failed = 1;
char values[4096];
values[0] = 0;
if (failed) {
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
int j;
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
sprintf(values + strlen(values), "] != expected [");
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
err = -1;
}
free(bufptr);
free(imgptr);
if (err)
log_error("ARRAY to IMAGE3D copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
return err;
}
int test_arrayimagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_int err;
cl_image_format *formats;
cl_uint num_formats;
cl_uint i;
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
test_error(err, "clGetSupportedImageFormats failed");
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
test_error(err, "clGetSupportedImageFormats failed");
for (i = 0; i < num_formats; i++) {
err |= test_arrayimagecopy3d_single_format(device, context, queue, &formats[i]);
}
if (err)
log_error("ARRAY to IMAGE3D copy test failed\n");
else
log_info("ARRAY to IMAGE3D copy test passed\n");
return err;
}

View File

@@ -0,0 +1,94 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int
test_arrayreadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_uint *inptr, *outptr;
cl_mem streams[1];
int num_tries = 400;
num_elements = 1024 * 1024 * 4;
int i, j, err;
MTdata d;
inptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
outptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
// randomize data
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
inptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed");
for (i=0; i<num_tries; i++)
{
int offset;
int cb;
do {
offset = (int)(genrand_int32(d) & 0x7FFFFFFF);
if (offset > 0 && offset < num_elements)
break;
} while (1);
cb = (int)(genrand_int32(d) & 0x7FFFFFFF);
if (cb > (num_elements - offset))
cb = num_elements - offset;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), sizeof(cl_uint)*cb,&inptr[offset], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), cb*sizeof(cl_uint), &outptr[offset], 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (j=offset; j<offset+cb; j++)
{
if (inptr[j] != outptr[j])
{
log_error("ARRAY read, write test failed\n");
err = -1;
break;
}
}
if (err)
break;
}
free_mtdata(d);
clReleaseMemObject(streams[0]);
free(inptr);
free(outptr);
if (!err)
log_info("ARRAY read, write test passed\n");
return err;
}

View File

@@ -0,0 +1,289 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
static const char *astype_kernel_pattern =
"%s\n"
"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( src[ tid ] );\n"
" dst[ tid ] = tmp;\n"
"}\n";
static const char *astype_kernel_pattern_V3srcV3dst =
"%s\n"
"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
" vstore3(tmp,tid,dst);\n"
"}\n";
// in the printf, remove the third and fifth argument, each of which
// should be a "3", when copying from the printf for astype_kernel_pattern
static const char *astype_kernel_pattern_V3dst =
"%s\n"
"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s3 tmp = as_%s3( src[ tid ] );\n"
" vstore3(tmp,tid,dst);\n"
"}\n";
// in the printf, remove the fifth argument, which
// should be a "3", when copying from the printf for astype_kernel_pattern
static const char *astype_kernel_pattern_V3src =
"%s\n"
"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
" dst[ tid ] = tmp;\n"
"}\n";
// in the printf, remove the third argument, which
// should be a "3", when copying from the printf for astype_kernel_pattern
int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
unsigned int vecSize, unsigned int outVecSize,
int numElements )
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
char programSrc[ 10240 ];
size_t threads[ 1 ], localThreads[ 1 ];
size_t typeSize = get_explicit_type_size( inVecType );
size_t outTypeSize = get_explicit_type_size(outVecType);
char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
MTdata d;
// Create program
if(outVecSize == 3 && vecSize == 3) {
// astype_kernel_pattern_V3srcV3dst
sprintf( programSrc, astype_kernel_pattern_V3srcV3dst,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), // sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] );
} else if(outVecSize == 3) {
// astype_kernel_pattern_V3dst
sprintf( programSrc, astype_kernel_pattern_V3dst,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
get_explicit_type_name( outVecType ),
get_explicit_type_name( outVecType ),
get_explicit_type_name( outVecType ));
} else if(vecSize == 3) {
// astype_kernel_pattern_V3src
sprintf( programSrc, astype_kernel_pattern_V3src,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ),// sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
} else {
sprintf( programSrc, astype_kernel_pattern,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
}
const char *ptr = programSrc;
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
// Create some input values
size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
char *inBuffer = (char*)malloc( inBufferSize );
size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
char *outBuffer = (char*)malloc( outBufferSize );
d = init_genrand( gRandomSeed );
generate_random_data( inVecType, numElements * vecSize,
d, inBuffer );
free_mtdata(d); d = NULL;
// Create I/O streams and set arguments
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error );
test_error( error, "Unable to create I/O stream" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
test_error( error, "Unable to create I/O stream" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
test_error( error, "Unable to set kernel argument" );
// Run the kernel
threads[ 0 ] = numElements;
error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
test_error( error, "Unable to get group size to run with" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
// Get the results and compare
// The beauty is that astype is supposed to return the bit pattern as a different type, which means
// the output should have the exact same bit pattern as the input. No interpretation necessary!
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
char *expected = inBuffer;
char *actual = outBuffer;
size_t compSize = typeSize*vecSize;
if(outTypeSize*outVecSize < compSize) {
compSize = outTypeSize*outVecSize;
}
if(outVecSize == 4 && vecSize == 3)
{
// as_type4(vec3) should compile but produce undefined results??
free(inBuffer);
free(outBuffer);
return 0;
}
if(outVecSize != 3 && vecSize != 3 && outVecSize != vecSize)
{
// as_typen(vecm) should compile and run but produce
// implementation-defined results for m != n
// and n*sizeof(type) = sizeof(vecm)
free(inBuffer);
free(outBuffer);
return 0;
}
for( int i = 0; i < numElements; i++ )
{
if( memcmp( expected, actual, compSize ) != 0 )
{
char expectedString[ 1024 ], actualString[ 1024 ];
log_error( "ERROR: Data sample %d of %d for as_%s%d( %s%d ) did not validate (expected {%s}, got {%s})\n",
(int)i, (int)numElements, get_explicit_type_name( outVecType ), vecSize, get_explicit_type_name( inVecType ), vecSize,
GetDataVectorString( expected, typeSize, vecSize, expectedString ),
GetDataVectorString( actual, typeSize, vecSize, actualString ) );
log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
programSrc, (int)threads[0],(int) localThreads[0]);
free(inBuffer);
free(outBuffer);
return 1;
}
expected += typeSize * vecSize;
actual += outTypeSize * outVecSize;
}
free(inBuffer);
free(outBuffer);
return 0;
}
int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// Note: although casting to different vector element sizes that match the same size (i.e. short2 -> char4) is
// legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
// for us to verify what is "valid". So the only thing we can test are types that match in size independent
// of the element count (char -> uchar, etc)
ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
size_t inTypeSize, outTypeSize;
int error = 0;
for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ )
{
inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
continue;
if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong )
continue;
for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ )
{
outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) {
continue;
}
if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
continue;
// change this check
if( inTypeIdx == outTypeIdx ) {
continue;
}
log_info( " (%s->%s)\n", get_explicit_type_name( vecTypes[ inTypeIdx ] ), get_explicit_type_name( vecTypes[ outTypeIdx ] ) );
fflush( stdout );
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
{
for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
{
if(vecSizes[sizeIdx]*inTypeSize !=
vecSizes[outSizeIdx]*outTypeSize )
{
continue;
}
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
}
}
if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
get_explicit_type_size(vecTypes[outTypeIdx])) {
// as_type3(vec4) allowed, as_type4(vec3) not allowed
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 3, 4, n_elems );
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 4, 3, n_elems );
}
}
}
return error;
}

View File

@@ -0,0 +1,276 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
static const char *async_global_to_local_kernel =
"%s\n" // optional pragma string
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
"{\n"
" int i;\n"
// Zero the local storage first
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" event_t event;\n"
" event = async_work_group_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );\n"
// Wait for the copy to complete, then verify by manually copying to the dest
" wait_group_events( 1, &event );\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
"}\n" ;
static const char *async_local_to_global_kernel =
"%s\n" // optional pragma string
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
"{\n"
" int i;\n"
// Zero the local storage first
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
// Do this to verify all kernels are done copying to the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" event_t event;\n"
" event = async_work_group_copy((__global %s*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, 0 );\n"
" wait_group_events( 1, &event );\n"
"}\n" ;
static const char *prefetch_kernel =
"%s\n" // optional pragma string
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
"{\n"
" // Ignore this: %s%s%s\n"
" int i;\n"
" prefetch( (const __global %s*)(src+copiesPerWorkItem*get_global_id(0)), copiesPerWorkItem);\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
"}\n" ;
int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode,
ExplicitType vecType, int vecSize
)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
size_t threads[ 1 ], localThreads[ 1 ];
void *inBuffer, *outBuffer;
MTdata d;
char vecNameString[64]; vecNameString[0] = 0;
if (vecSize == 1)
sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
else
sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
size_t elementSize = get_explicit_type_size(vecType)*vecSize;
log_info("Testing %s\n", vecNameString);
cl_long max_local_mem_size;
error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
unsigned int num_of_compute_devices;
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
char programSource[4096]; programSource[0]=0;
char *programPtr;
sprintf(programSource, kernelCode,
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
//log_info("program: %s\n", programSource);
programPtr = programSource;
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
size_t max_workgroup_size;
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
size_t max_local_workgroup_size[3];
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (max_workgroup_size > max_local_workgroup_size[0])
max_workgroup_size = max_local_workgroup_size[0];
size_t numberOfCopiesPerWorkitem = 13;
size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
// Calculation can return 0 on embedded devices due to 1KB local mem limit
if(maxLocalWorkgroupSize == 0)
{
maxLocalWorkgroupSize = 1;
}
size_t localWorkgroupSize = maxLocalWorkgroupSize;
if (maxLocalWorkgroupSize > max_workgroup_size)
localWorkgroupSize = max_workgroup_size;
size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
size_t numberOfLocalWorkgroups = 1111;
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize;
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
inBuffer = (void*)malloc(globalBufferSize);
outBuffer = (void*)malloc(globalBufferSize);
memset(outBuffer, 0, globalBufferSize);
cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
log_info("Global: %d, local %d, local buffer %db, global buffer %db, each work group will copy %d elements and each work item item will copy %d elements.\n",
(int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, copiesPerWorkgroup, copiesPerWorkItemInt);
threads[0] = globalWorkgroupSize;
localThreads[0] = localWorkgroupSize;
d = init_genrand( gRandomSeed );
generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
free_mtdata(d); d = NULL;
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
test_error( error, "Unable to create input buffer" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
test_error( error, "Unable to set kernel argument" );
// Enqueue
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to queue kernel" );
// Read
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Verify
if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 )
{
log_error( "ERROR: Results of copy did not validate!\n" );
unsigned char * inchar = (unsigned char*)inBuffer;
unsigned char * outchar = (unsigned char*)outBuffer;
int failuresPrinted = 0;
for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) {
int failed = 0;
for (int j=0; j<(int)elementSize; j++)
if (inchar[i+j] != outchar[i+j])
failed = 1;
char values[4096];
values[0] = 0;
if (failed) {
sprintf(values + strlen( values), "%d -> [", i);
for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", inchar[i+j]);
sprintf(values + strlen(values), "] != [");
for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
return -1;
}
free(inBuffer);
free(outBuffer);
return 0;
}
int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) {
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
unsigned int size, typeIndex;
int errors = 0;
for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
{
if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
continue;
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
continue;
for( size = 0; vecSizes[ size ] != 0; size++ )
{
if (test_copy( deviceID, context, queue, kernelCode, vecType[typeIndex],vecSizes[size] )) {
errors++;
}
}
}
if (errors)
return -1;
return 0;
}
int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel );
}
int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_copy_all_types( deviceID, context, queue, async_local_to_global_kernel );
}
int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_copy_all_types( deviceID, context, queue, prefetch_kernel );
}

View File

@@ -0,0 +1,267 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
static const char *async_strided_global_to_local_kernel =
"%s\n" // optional pragma string
"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
"{\n"
" int i;\n"
// Zero the local storage first
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" event_t event;\n"
" event = async_work_group_strided_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*stride*get_group_id(0)), (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
// Wait for the copy to complete, then verify by manually copying to the dest
" wait_group_events( 1, &event );\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" dst[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
"}\n" ;
static const char *async_strided_local_to_global_kernel =
"%s\n" // optional pragma string
"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
"{\n"
" int i;\n"
// Zero the local storage first
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ];\n"
// Do this to verify all kernels are done copying to the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" event_t event;\n"
" event = async_work_group_strided_copy((__global %s*)(dst+copiesPerWorkgroup*stride*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
" wait_group_events( 1, &event );\n"
"}\n" ;
int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
size_t threads[ 1 ], localThreads[ 1 ];
void *inBuffer, *outBuffer;
MTdata d;
char vecNameString[64]; vecNameString[0] = 0;
if (vecSize == 1)
sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
else
sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
size_t elementSize = get_explicit_type_size(vecType)*vecSize;
log_info("Testing %s\n", vecNameString);
cl_long max_local_mem_size;
error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
unsigned int num_of_compute_devices;
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
char programSource[4096]; programSource[0]=0;
char *programPtr;
sprintf(programSource, kernelCode,
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
"",
vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
//log_info("program: %s\n", programSource);
programPtr = programSource;
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
size_t max_workgroup_size;
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
size_t max_local_workgroup_size[3];
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (max_workgroup_size > max_local_workgroup_size[0])
max_workgroup_size = max_local_workgroup_size[0];
cl_ulong max_global_mem_size;
error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(max_global_mem_size), &max_global_mem_size, NULL);
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
cl_bool unified_mem;
error = clGetDeviceInfo(deviceID, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(unified_mem), &unified_mem, NULL);
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
int number_of_global_mem_buffers = (unified_mem) ? 4 : 2;
size_t numberOfCopiesPerWorkitem = 3;
size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
size_t localWorkgroupSize = maxLocalWorkgroupSize;
if (maxLocalWorkgroupSize > max_workgroup_size)
localWorkgroupSize = max_workgroup_size;
size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
size_t numberOfLocalWorkgroups = 579;//1111;
// Reduce the numberOfLocalWorkgroups so that no more than 1/2 of CL_DEVICE_GLOBAL_MEM_SIZE is consumed
// by the allocated buffer. This is done to avoid resource errors resulting from address space fragmentation.
size_t numberOfLocalWorkgroupsLimit = max_global_mem_size / (2 * number_of_global_mem_buffers * localBufferSize * stride);
if (numberOfLocalWorkgroups > numberOfLocalWorkgroupsLimit) numberOfLocalWorkgroups = numberOfLocalWorkgroupsLimit;
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride;
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
inBuffer = (void*)malloc(globalBufferSize);
outBuffer = (void*)malloc(globalBufferSize);
memset(outBuffer, 0, globalBufferSize);
cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
log_info("Global: %d, local %d, local buffer %db, global buffer %db, copy stride %d, each work group will copy %d elements and each work item item will copy %d elements.\n",
(int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, (int)stride, copiesPerWorkgroup, copiesPerWorkItemInt);
threads[0] = globalWorkgroupSize;
localThreads[0] = localWorkgroupSize;
d = init_genrand( gRandomSeed );
generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
free_mtdata(d); d = NULL;
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
test_error( error, "Unable to create input buffer" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 5, sizeof(stride), &stride );
test_error( error, "Unable to set kernel argument" );
// Enqueue
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to queue kernel" );
// Read
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Verify
for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride)
{
if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, elementSize) != 0 )
{
unsigned char * inchar = (unsigned char*)inBuffer + i;
unsigned char * outchar = (unsigned char*)outBuffer + i;
char values[4096];
values[0] = 0;
log_error( "ERROR: Results of copy did not validate!\n" );
sprintf(values + strlen( values), "%d -> [", i);
for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]);
sprintf(values + strlen(values), "] != [");
for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
return -1;
}
}
free(inBuffer);
free(outBuffer);
return 0;
}
int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode)
{
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
unsigned int size, typeIndex, stride;
int errors = 0;
for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
{
if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
continue;
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
continue;
for( size = 0; vecSizes[ size ] != 0; size++ )
{
for( stride = 0; strideSizes[ stride ] != 0; stride++)
{
if (test_strided_copy( deviceID, context, queue, kernelCode, vecType[typeIndex], vecSizes[size], strideSizes[stride] ))
{
errors++;
}
}
}
}
if (errors)
return -1;
return 0;
}
int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel );
}
int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_strided_copy_all_types( deviceID, context, queue, async_strided_local_to_global_kernel );
}

View File

@@ -0,0 +1,158 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *barrier_kernel_code =
"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
"{\n"
" int tid = get_local_id(0);\n"
" int lsize = get_local_size(0);\n"
" int i;\n"
"\n"
" tmp_sum[tid] = 0;\n"
" for (i=tid; i<n; i+=lsize)\n"
" tmp_sum[tid] += a[i];\n"
" \n"
" // updated to work for any workgroup size \n"
" for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
" {\n"
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
" if (tid + i < lsize)\n"
" tmp_sum[tid] += tmp_sum[tid + i];\n"
" lsize = i; \n"
" }\n"
"\n"
" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
" if (tid == 0)\n"
" *sum = tmp_sum[0];\n"
"}\n";
static int
verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
{
int r = 0;
int i;
for (i=0; i<n; i++)
{
r += inptr[i];
}
if (r != outptr[0])
{
log_error("BARRIER test failed\n");
return -1;
}
log_info("BARRIER test passed\n");
return 0;
}
int
test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_int *input_ptr = NULL, *output_ptr = NULL, *tmp_ptr =NULL;
cl_program program;
cl_kernel kernel;
size_t global_threads[3];
size_t local_threads[3];
int err;
int i;
size_t max_local_workgroup_size[3];
size_t max_threadgroup_size = 0;
MTdata d;
err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" );
test_error(err, "Failed to build kernel/program.");
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
test_error(err, "clGetKernelWorkgroupInfo failed.");
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (max_threadgroup_size > max_local_workgroup_size[0])
max_threadgroup_size = max_local_workgroup_size[0];
// work group size must divide evenly into the global size
while( num_elements % max_threadgroup_size )
max_threadgroup_size--;
input_ptr = (int*)malloc(sizeof(int) * num_elements);
output_ptr = (int*)malloc(sizeof(int));
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int), NULL, &err);
test_error(err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * max_threadgroup_size, NULL, &err);
test_error(err, "clCreateBuffer failed.");
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed.");
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
test_error(err, "clSetKernelArg failed.");
global_threads[0] = max_threadgroup_size;
local_threads[0] = max_threadgroup_size;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed.");
err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,302 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *kernel_code =
"__kernel void test_kernel(\n"
"char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,\n"
"__global float%s *result)\n"
"{\n"
" result[0] = %s(c);\n"
" result[1] = %s(uc);\n"
" result[2] = %s(s);\n"
" result[3] = %s(us);\n"
" result[4] = %s(i);\n"
" result[5] = %s(ui);\n"
" result[6] = f;\n"
"}\n";
const char *kernel_code_long =
"__kernel void test_kernel_long(\n"
"long%s l, ulong%s ul,\n"
"__global float%s *result)\n"
"{\n"
" result[0] = %s(l);\n"
" result[1] = %s(ul);\n"
"}\n";
int
test_basic_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clMemWrapper results;
int error;
size_t global[3] = {1, 1, 1};
float results_back[2*16];
int count, index;
const char* types[] = { "long", "ulong" };
char kernel_string[8192];
int sizes[] = {1, 2, 4, 8, 16};
const char* size_strings[] = {"", "2", "4", "8", "16"};
float expected;
int total_errors = 0;
int size_to_test;
char *ptr;
char convert_string[1024];
size_t max_parameter_size;
// We don't really care about the contents since we're just testing that the types work.
cl_long l[16]={-21,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_ulong ul[16]={22,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
// Calculate how large our paramter size is to the kernel
size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong);
// Init our strings.
kernel_string[0] = '\0';
convert_string[0] = '\0';
// Get the maximum parameter size allowed
error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
test_error( error, "Unable to get max parameter size from device" );
// Create the results buffer
results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error);
test_error(error, "clCreateBuffer failed");
// Go over all the vector sizes
for (size_to_test = 0; size_to_test < 5; size_to_test++) {
clProgramWrapper program;
clKernelWrapper kernel;
size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
if (total_parameter_size > max_parameter_size) {
log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
(int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
continue;
}
log_info("Testing vector size %d\n", sizes[size_to_test]);
// If size is > 1, then we need a explicit convert call.
if (sizes[size_to_test] > 1) {
sprintf(convert_string, "convert_float%s", size_strings[size_to_test]);
} else {
sprintf(convert_string, " ");
}
// Build the kernel
sprintf(kernel_string, kernel_code_long,
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
convert_string, convert_string
);
ptr = kernel_string;
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel_long");
test_error(error, "create single kernel failed");
// Set the arguments
for (count = 0; count < 2; count++) {
switch (count) {
case 0: error = clSetKernelArg(kernel, count, sizeof(cl_long)*sizes[size_to_test], &l); break;
case 1: error = clSetKernelArg(kernel, count, sizeof(cl_ulong)*sizes[size_to_test], &ul); break;
default: log_error("Test error"); break;
}
if (error)
log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
test_error(error, "clSetKernelArgs failed");
}
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results);
test_error(error, "clSetKernelArgs failed");
// Execute
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*2*16, results_back, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
for (count = 0; count < 2; count++) {
for (index=0; index < sizes[size_to_test]; index++) {
switch (count) {
case 0: expected = (float)l[index]; break;
case 1: expected = (float)ul[index]; break;
default: log_error("Test error"); break;
}
if (results_back[count*sizes[size_to_test]+index] != expected) {
total_errors++;
log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
index, results_back[count*sizes[size_to_test]+index], expected);
}
}
}
}
return total_errors;
}
int
test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clMemWrapper results;
int error;
size_t global[3] = {1, 1, 1};
float results_back[7*16];
int count, index;
const char* types[] = {"char", "uchar", "short", "ushort", "int", "uint", "float"};
char kernel_string[8192];
int sizes[] = {1, 2, 4, 8, 16};
const char* size_strings[] = {"", "2", "4", "8", "16"};
float expected;
int total_errors = 0;
int size_to_test;
char *ptr;
char convert_string[1024];
size_t max_parameter_size;
// We don't really care about the contents since we're just testing that the types work.
cl_char c[16]={0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_uchar uc[16]={16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_short s[16]={-17,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_ushort us[16]={18,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_int i[16]={-19,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_uint ui[16]={20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_float f[16]={-23,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
// Calculate how large our paramter size is to the kernel
size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) +
sizeof(cl_short) +sizeof(cl_ushort) +
sizeof(cl_int) +sizeof(cl_uint) +
sizeof(cl_float);
// Init our strings.
kernel_string[0] = '\0';
convert_string[0] = '\0';
// Get the maximum parameter size allowed
error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
test_error( error, "Unable to get max parameter size from device" );
// Create the results buffer
results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error);
test_error(error, "clCreateBuffer failed");
// Go over all the vector sizes
for (size_to_test = 0; size_to_test < 5; size_to_test++) {
clProgramWrapper program;
clKernelWrapper kernel;
size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
if (total_parameter_size > max_parameter_size) {
log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
(int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
continue;
}
log_info("Testing vector size %d\n", sizes[size_to_test]);
// If size is > 1, then we need a explicit convert call.
if (sizes[size_to_test] > 1) {
sprintf(convert_string, "convert_float%s", size_strings[size_to_test]);
} else {
sprintf(convert_string, " ");
}
// Build the kernel
sprintf(kernel_string, kernel_code,
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
size_strings[size_to_test], size_strings[size_to_test],
convert_string, convert_string, convert_string,
convert_string, convert_string, convert_string
);
ptr = kernel_string;
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel");
test_error(error, "create single kernel failed");
// Set the arguments
for (count = 0; count < 7; count++) {
switch (count) {
case 0: error = clSetKernelArg(kernel, count, sizeof(cl_char)*sizes[size_to_test], &c); break;
case 1: error = clSetKernelArg(kernel, count, sizeof(cl_uchar)*sizes[size_to_test], &uc); break;
case 2: error = clSetKernelArg(kernel, count, sizeof(cl_short)*sizes[size_to_test], &s); break;
case 3: error = clSetKernelArg(kernel, count, sizeof(cl_ushort)*sizes[size_to_test], &us); break;
case 4: error = clSetKernelArg(kernel, count, sizeof(cl_int)*sizes[size_to_test], &i); break;
case 5: error = clSetKernelArg(kernel, count, sizeof(cl_uint)*sizes[size_to_test], &ui); break;
case 6: error = clSetKernelArg(kernel, count, sizeof(cl_float)*sizes[size_to_test], &f); break;
default: log_error("Test error"); break;
}
if (error)
log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
test_error(error, "clSetKernelArgs failed");
}
error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results);
test_error(error, "clSetKernelArgs failed");
// Execute
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*7*16, results_back, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
for (count = 0; count < 7; count++) {
for (index=0; index < sizes[size_to_test]; index++) {
switch (count) {
case 0: expected = (float)c[index]; break;
case 1: expected = (float)uc[index]; break;
case 2: expected = (float)s[index]; break;
case 3: expected = (float)us[index]; break;
case 4: expected = (float)i[index]; break;
case 5: expected = (float)ui[index]; break;
case 6: expected = (float)f[index]; break;
default: log_error("Test error"); break;
}
if (results_back[count*sizes[size_to_test]+index] != expected) {
total_errors++;
log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
index, results_back[count*sizes[size_to_test]+index], expected);
}
}
}
}
if (gHasLong) {
log_info("Testing long types...\n");
total_errors += test_basic_parameter_types_long( device, context, queue, num_elements );
}
else {
log_info("Longs unsupported, skipping.");
}
return total_errors;
}

View File

@@ -0,0 +1,529 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#define CL_EXIT_ERROR(cmd,format,...) \
{ \
if ((cmd) != CL_SUCCESS) { \
log_error("CL ERROR: %s %u: ", __FILE__,__LINE__); \
log_error(format,## __VA_ARGS__ ); \
log_error("\n"); \
/*abort();*/ \
} \
}
typedef unsigned char BufferType;
// Globals for test
cl_command_queue queue;
// Width and height of each pair of images.
enum { TotalImages = 8 };
size_t width [TotalImages];
size_t height [TotalImages];
size_t depth [TotalImages];
// cl buffer and host buffer.
cl_mem buffer [TotalImages];
BufferType* verify[TotalImages];
BufferType* backing[TotalImages];
// Temporary buffer used for read and write operations.
BufferType* tmp_buffer;
size_t tmp_buffer_size;
size_t num_tries = 50; // Number of randomly selected operations to perform.
size_t alloc_scale = 2; // Scale term applied buffer allocation size.
MTdata mt;
// Initialize a buffer in host memory containing random values of the specified size.
static void initialize_image(BufferType* ptr, size_t w, size_t h, size_t d, MTdata mt)
{
enum { ElementSize = sizeof(BufferType)/sizeof(unsigned char) };
unsigned char* buf = (unsigned char*)ptr;
size_t size = w*h*d*ElementSize;
for (size_t i = 0; i != size; i++) {
buf[i] = (unsigned char)(genrand_int32(mt) % 0xff);
}
}
// This function prints the contents of a buffer to standard error.
void print_buffer(BufferType* buf, size_t w, size_t h, size_t d) {
log_error("Size = %lux%lux%lu (%lu total)\n",w,h,d,w*h*d);
for (unsigned k=0; k!=d;++k) {
log_error("Slice: %u\n",k);
for (unsigned j=0; j!=h;++j) {
for (unsigned i=0;i!=w;++i) {
log_error("%02x",buf[k*(w*h)+j*w+i]);
}
log_error("\n");
}
log_error("\n");
}
}
// Returns true if the two specified regions overlap.
bool check_overlap(const size_t src_offset[3], const size_t dst_offset[3], const size_t region[3]) {
const size_t src_min[] = {src_offset[0], src_offset[1], src_offset[2]};
const size_t src_max[] = {src_offset[0]+region[0], src_offset[1]+region[1], src_offset[2]+region[2]};
const size_t dst_min[] = {dst_offset[0], dst_offset[1], dst_offset[2]};
const size_t dst_max[] = {dst_offset[0]+region[0], dst_offset[1]+region[1], dst_offset[2]+region[2]};
// Check for overlap, using the span space formulation.
bool overlap = true;
unsigned i;
for (i=0; i != 3; ++i) {
overlap = overlap && (src_min[i] < dst_max[i]) && (src_max[i] > dst_min[i]);
}
return overlap;
}
// This function invokes the CopyBufferRect CL command and then mirrors the operation on the host side verify buffers.
int copy_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
// Copy between cl buffers.
size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
cl_int err;
if (check_overlap(soffset,doffset,sregion)) {
log_info( "Copy overlap reported, skipping copy buffer rect\n" );
return CL_SUCCESS;
} else {
if ((err = clEnqueueCopyBufferRect(queue,
buffer[src],buffer[dst],
soffset, doffset,
sregion,/*dregion,*/
width[src], src_slice_pitch,
width[dst], dst_slice_pitch,
0, NULL, NULL)) != CL_SUCCESS)
{
CL_EXIT_ERROR(err, "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
}
}
// Copy between host buffers.
size_t total = sregion[0] * sregion[1] * sregion[2];
size_t spitch = width[src];
size_t sslice = width[src]*height[src];
size_t dpitch = width[dst];
size_t dslice = width[dst]*height[dst];
for (size_t i = 0; i != total; ++i) {
// Compute the coordinates of the element within the source and destination regions.
size_t rslice = sregion[0]*sregion[1];
size_t sz = i / rslice;
size_t sy = (i % rslice) / sregion[0];
size_t sx = (i % rslice) % sregion[0];
size_t dz = sz;
size_t dy = sy;
size_t dx = sx;
// Compute the offset in bytes of the source and destination.
size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
verify[dst][d_idx] = verify[src][s_idx];
}
return 0;
}
// This function compares the destination region in the buffer pointed
// to by device, to the source region of the specified verify buffer.
int verify_region(BufferType* device, size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3]) {
// Copy between host buffers.
size_t spitch = width[src];
size_t sslice = width[src]*height[src];
size_t dpitch = width[dst];
size_t dslice = width[dst]*height[dst];
size_t total = sregion[0] * sregion[1] * sregion[2];
for (size_t i = 0; i != total; ++i) {
// Compute the coordinates of the element within the source and destination regions.
size_t rslice = sregion[0]*sregion[1];
size_t sz = i / rslice;
size_t sy = (i % rslice) / sregion[0];
size_t sx = (i % rslice) % sregion[0];
// Compute the offset in bytes of the source and destination.
size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
size_t d_idx = (doffset[2]+sz)*dslice + (doffset[1]+sy)*dpitch + doffset[0]+sx;
if (device[d_idx] != verify[src][s_idx]) {
log_error("Verify failed on comparsion %lu: coordinate (%lu, %lu, %lu) of region\n",i,sx,sy,sz);
log_error("0x%02x != 0x%02x\n", device[d_idx], verify[src][s_idx]);
#if 0
// Uncomment this section to print buffers.
log_error("Device (copy): [%lu]\n",dst);
print_buffer(device,width[dst],height[dst],depth[dst]);
log_error("\n");
log_error("Verify: [%lu]\n",src);
print_buffer(verify[src],width[src],height[src],depth[src]);
log_error("\n");
abort();
#endif
return -1;
}
}
return 0;
}
// This function invokes ReadBufferRect to read a region from the
// specified source buffer into a temporary destination buffer. The
// contents of the temporary buffer are then compared to the source
// region of the corresponding verify buffer.
int read_verify_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
// Clear the temporary destination host buffer.
memset(tmp_buffer, 0xff, tmp_buffer_size);
size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
// Copy the source region of the cl buffer, to the destination region of the temporary buffer.
CL_EXIT_ERROR(clEnqueueReadBufferRect(queue,
buffer[src],
CL_TRUE,
soffset,doffset,
sregion,
width[src], src_slice_pitch,
width[dst], dst_slice_pitch,
tmp_buffer,
0, NULL, NULL), "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
return verify_region(tmp_buffer,src,soffset,sregion,dst,doffset);
}
// This function performs the same verification check as
// read_verify_region, except a MapBuffer command is used to access the
// device buffer data instead of a ReadBufferRect, and the whole
// buffer is checked.
int map_verify_region(size_t src) {
size_t size_bytes = width[src]*height[src]*depth[src]*sizeof(BufferType);
// Copy the source region of the cl buffer, to the destination region of the temporary buffer.
cl_int err;
BufferType* mapped = (BufferType*)clEnqueueMapBuffer(queue,buffer[src],CL_TRUE,CL_MAP_READ,0,size_bytes,0,NULL,NULL,&err);
CL_EXIT_ERROR(err, "clEnqueueMapBuffer failed for buffer %u",(unsigned)src);
size_t soffset[] = { 0, 0, 0 };
size_t sregion[] = { width[src], height[src], depth[src] };
int ret = verify_region(mapped,src,soffset,sregion,src,soffset);
CL_EXIT_ERROR(clEnqueueUnmapMemObject(queue,buffer[src],mapped,0,NULL,NULL),
"clEnqueueUnmapMemObject failed for buffer %u",(unsigned)src);
return ret;
}
// This function generates a new temporary buffer and then writes a
// region of it to a region in the specified destination buffer.
int write_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
initialize_image(tmp_buffer, tmp_buffer_size, 1, 1, mt);
// memset(tmp_buffer, 0xf0, tmp_buffer_size);
size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
// Copy the source region of the cl buffer, to the destination region of the temporary buffer.
CL_EXIT_ERROR(clEnqueueWriteBufferRect(queue,
buffer[dst],
CL_TRUE,
doffset,soffset,
/*sregion,*/dregion,
width[dst], dst_slice_pitch,
width[src], src_slice_pitch,
tmp_buffer,
0, NULL, NULL), "clEnqueueWriteBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
// Copy from the temporary buffer to the host buffer.
size_t spitch = width[src];
size_t sslice = width[src]*height[src];
size_t dpitch = width[dst];
size_t dslice = width[dst]*height[dst];
size_t total = sregion[0] * sregion[1] * sregion[2];
for (size_t i = 0; i != total; ++i) {
// Compute the coordinates of the element within the source and destination regions.
size_t rslice = sregion[0]*sregion[1];
size_t sz = i / rslice;
size_t sy = (i % rslice) / sregion[0];
size_t sx = (i % rslice) % sregion[0];
size_t dz = sz;
size_t dy = sy;
size_t dx = sx;
// Compute the offset in bytes of the source and destination.
size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
verify[dst][d_idx] = tmp_buffer[s_idx];
}
return 0;
}
void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
{
free( data );
}
// This is the main test function for the conformance test.
int
test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements)
{
queue = queue_;
cl_int err;
// Initialize the random number generator.
mt = init_genrand( gRandomSeed );
// Compute a maximum buffer size based on the number of test images and the device maximum.
cl_ulong max_mem_alloc_size = 0;
CL_EXIT_ERROR(clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_mem_alloc_size, NULL),"Could not get device info");
log_info("CL_DEVICE_MAX_MEM_ALLOC_SIZE = %llu bytes.\n", max_mem_alloc_size);
// Confirm that the maximum allocation size is not zero.
if (max_mem_alloc_size == 0) {
log_error("Error: CL_DEVICE_MAX_MEM_ALLOC_SIZE is zero bytes\n");
return -1;
}
// Guess at a reasonable maximum dimension.
size_t max_mem_alloc_dim = (size_t)cbrt((double)(max_mem_alloc_size/sizeof(BufferType)))/alloc_scale;
if (max_mem_alloc_dim == 0) {
max_mem_alloc_dim = max_mem_alloc_size;
}
log_info("Using maximum dimension = %lu.\n", max_mem_alloc_dim);
// Create pairs of cl buffers and host buffers on which operations will be mirrored.
log_info("Creating %u pairs of random sized host and cl buffers.\n", TotalImages);
size_t max_size = 0;
size_t total_bytes = 0;
for (unsigned i=0; i != TotalImages; ++i) {
// Determine a width and height for this buffer.
size_t size_bytes;
size_t tries = 0;
size_t max_tries = 1048576;
do {
width[i] = get_random_size_t(1, max_mem_alloc_dim, mt);
height[i] = get_random_size_t(1, max_mem_alloc_dim, mt);
depth[i] = get_random_size_t(1, max_mem_alloc_dim, mt);
++tries;
} while ((tries < max_tries) && (size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType)) > max_mem_alloc_size);
// Check to see if adequately sized buffers were found.
if (tries >= max_tries) {
log_error("Error: Could not find random buffer sized less than %llu bytes in %lu tries.\n",
max_mem_alloc_size, max_tries);
return -1;
}
// Keep track of the dimensions of the largest buffer.
max_size = (size_bytes > max_size) ? size_bytes : max_size;
total_bytes += size_bytes;
log_info("Buffer[%u] is (%lu,%lu,%lu) = %lu MB (truncated)\n",i,width[i],height[i],depth[i],(size_bytes)/1048576);
}
log_info( "Total size: %lu MB (truncated)\n", total_bytes/1048576 );
// Allocate a temporary buffer for read and write operations.
tmp_buffer_size = max_size;
tmp_buffer = (BufferType*)malloc(tmp_buffer_size);
// Initialize cl buffers
log_info( "Initializing buffers\n" );
for (unsigned i=0; i != TotalImages; ++i) {
size_t size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType);
// Allocate a host copy of the buffer for verification.
verify[i] = (BufferType*)malloc(size_bytes);
CL_EXIT_ERROR(verify[i] ? CL_SUCCESS : -1, "malloc of host buffer failed for buffer %u", i);
// Allocate the buffer in host memory.
backing[i] = (BufferType*)malloc(size_bytes);
CL_EXIT_ERROR(backing[i] ? CL_SUCCESS : -1, "malloc of backing buffer failed for buffer %u", i);
// Generate a random buffer.
log_info( "Initializing buffer %u\n", i );
initialize_image(verify[i], width[i], height[i], depth[i], mt);
// Copy the image into a buffer which will passed to CL.
memcpy(backing[i], verify[i], size_bytes);
// Create the CL buffer.
buffer[i] = clCreateBuffer (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, size_bytes, backing[i], &err);
CL_EXIT_ERROR(err,"clCreateBuffer failed for buffer %u", i);
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
err = clSetMemObjectDestructorCallback( buffer[i], mem_obj_destructor_callback, backing[i] );
CL_EXIT_ERROR(err, "Unable to set mem object destructor callback" );
}
// Main test loop, run num_tries times.
log_info( "Executing %u test operations selected at random.\n", (unsigned)num_tries );
for (size_t iter = 0; iter < num_tries; ++iter) {
// Determine a source and a destination.
size_t src = get_random_size_t(0,TotalImages,mt);
size_t dst = get_random_size_t(0,TotalImages,mt);
// Determine the minimum dimensions.
size_t min_width = width[src] < width[dst] ? width[src] : width[dst];
size_t min_height = height[src] < height[dst] ? height[src] : height[dst];
size_t min_depth = depth[src] < depth[dst] ? depth[src] : depth[dst];
// Generate a random source rectangle within the minimum dimensions.
size_t mx = get_random_size_t(0, min_width-1, mt);
size_t my = get_random_size_t(0, min_height-1, mt);
size_t mz = get_random_size_t(0, min_depth-1, mt);
size_t sw = get_random_size_t(1, (min_width - mx), mt);
size_t sh = get_random_size_t(1, (min_height - my), mt);
size_t sd = get_random_size_t(1, (min_depth - mz), mt);
size_t sx = get_random_size_t(0, width[src]-sw, mt);
size_t sy = get_random_size_t(0, height[src]-sh, mt);
size_t sz = get_random_size_t(0, depth[src]-sd, mt);
size_t soffset[] = { sx, sy, sz };
size_t sregion[] = { sw, sh, sd };
// Generate a destination rectangle of the same size.
size_t dw = sw;
size_t dh = sh;
size_t dd = sd;
// Generate a random destination offset within the buffer.
size_t dx = get_random_size_t(0, (width[dst] - dw), mt);
size_t dy = get_random_size_t(0, (height[dst] - dh), mt);
size_t dz = get_random_size_t(0, (depth[dst] - dd), mt);
size_t doffset[] = { dx, dy, dz };
size_t dregion[] = { dw, dh, dd };
// Execute one of three operations:
// - Copy: Copies between src and dst within each set of host, buffer, and images.
// - Read & verify: Reads src region from buffer and image, and compares to host.
// - Write: Generates new buffer with src dimensions, and writes to cl buffer and image.
enum { TotalOperations = 3 };
size_t operation = get_random_size_t(0,TotalOperations,mt);
switch (operation) {
case 0:
log_info("%lu Copy %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
iter,
src, soffset[0], soffset[1], soffset[2],
dst, doffset[0], doffset[1], doffset[2],
sregion[0], sregion[1], sregion[2],
sregion[0]*sregion[1]*sregion[2]);
if ((err = copy_region(src, soffset, sregion, dst, doffset, dregion)))
return err;
break;
case 1:
log_info("%lu Read %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
iter,
src, soffset[0], soffset[1], soffset[2],
dst, doffset[0], doffset[1], doffset[2],
sregion[0], sregion[1], sregion[2],
sregion[0]*sregion[1]*sregion[2]);
if ((err = read_verify_region(src, soffset, sregion, dst, doffset, dregion)))
return err;
break;
case 2:
log_info("%lu Write %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
iter,
src, soffset[0], soffset[1], soffset[2],
dst, doffset[0], doffset[1], doffset[2],
sregion[0], sregion[1], sregion[2],
sregion[0]*sregion[1]*sregion[2]);
if ((err = write_region(src, soffset, sregion, dst, doffset, dregion)))
return err;
break;
}
#if 0
// Uncomment this section to verify each operation.
// If commented out, verification won't occur until the end of the
// test, and it will not be possible to determine which operation failed.
log_info("Verify src %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", src, 0, 0, 0, width[src], height[src], depth[src]);
if (err = map_verify_region(src))
return err;
log_info("Verify dst %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", dst, 0, 0, 0, width[dst], height[dst], depth[dst]);
if (err = map_verify_region(dst))
return err;
#endif
} // end main for loop.
for (unsigned i=0;i<TotalImages;++i) {
log_info("Verify %u offset (%u,%u,%u) region (%lux%lux%lu)\n", i, 0, 0, 0, width[i], height[i], depth[i]);
if ((err = map_verify_region(i)))
return err;
}
// Clean-up.
free_mtdata(mt);
for (unsigned i=0;i<TotalImages;++i) {
free( verify[i] );
clReleaseMemObject( buffer[i] );
}
free( tmp_buffer );
if (!err) {
log_info("RECT read, write test passed\n");
}
return err;
}

View File

@@ -0,0 +1,275 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *constant_kernel_code =
"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" float ftmp = tmpF[tid]; \n"
" float Itmp = tmpI[tid]; \n"
" out[tid] = ftmp * Itmp; \n"
"}\n";
const char *loop_constant_kernel_code =
"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n"
"{\n"
" int tid = get_global_id(0);\n"
" float sum = 0;\n"
" for (int i = 0; i < num; i++) {\n"
" float pos = i_pos[i*3];\n"
" sum += pos;\n"
" }\n"
" out[tid] = sum;\n"
"}\n";
static int
verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
{
int i;
for (i=0; i < n; i++)
{
float f = tmpF[i] * tmpI[i];
if( out[i] != f )
{
log_error("CONSTANT test failed\n");
return -1;
}
}
log_info("CONSTANT test passed\n");
return 0;
}
static int
verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n)
{
int i;
cl_int j;
for (i=0; i < n; i++)
{
float sum = 0;
for (j=0; j < l; ++j)
sum += tmp[j*3];
if( out[i] != sum )
{
log_error("loop CONSTANT test failed\n");
return -1;
}
}
log_info("loop CONSTANT test passed\n");
return 0;
}
int
test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_int *tmpI;
cl_float *tmpF, *out;
cl_program program;
cl_kernel kernel;
size_t global_threads[3];
int err;
unsigned int i;
cl_ulong maxSize, maxGlobalSize, maxAllocSize;
size_t num_floats, num_ints, constant_values;
MTdata d;
RoundingMode oldRoundMode;
int isRTZ = 0;
/* Verify our test buffer won't be bigger than allowed */
err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
test_error( err, "Unable to get max constant buffer size" );
log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize);
// Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE
err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
if (maxSize > maxGlobalSize / 4)
maxSize = maxGlobalSize / 4;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
if (maxSize > maxAllocSize)
maxSize = maxAllocSize;
maxSize/=4;
num_ints = (size_t)maxSize/sizeof(cl_int);
num_floats = (size_t)maxSize/sizeof(cl_float);
if (num_ints >= num_floats) {
constant_values = num_floats;
} else {
constant_values = num_ints;
}
log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n",
constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float));
tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
out = (cl_float*)malloc(sizeof(cl_float) * constant_values);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * constant_values, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
d = init_genrand( gRandomSeed );
for (i=0; i<constant_values; i++) {
tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
}
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" );
if (err) {
log_error("Failed to create kernel and program: %d\n", err);
return -1;
}
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
global_threads[0] = constant_values;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed: %d\n", err);
return -1;
}
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
{
oldRoundMode = set_round(kRoundTowardZero, kfloat);
isRTZ = 1;
}
err = verify(tmpF, tmpI, out, (int)constant_values);
if (isRTZ)
(void)set_round(oldRoundMode, kfloat);
// Loop constant buffer test
cl_program loop_program;
cl_kernel loop_kernel;
cl_int limit = 2;
memset(out, 0, sizeof(cl_float) * constant_values);
err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1,
&loop_constant_kernel_code, "loop_constant_kernel" );
if (err) {
log_error("Failed to create loop kernel and program: %d\n", err);
return -1;
}
err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit);
if (err != CL_SUCCESS) {
log_error("clSetKernelArgs for loop kernel failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS) {
log_error("clEnqueueNDRangeKernel failed: %d\n", err);
return -1;
}
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
if (err != CL_SUCCESS) {
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_loop_constant(tmpF, out, limit, (int)constant_values);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseKernel(loop_kernel);
clReleaseProgram(loop_program);
free(tmpI);
free(tmpF);
free(out);
return err;
}

View File

@@ -0,0 +1,100 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *constant_source_kernel_code[] = {
"__constant int outVal = 42;\n"
"__constant int outIndex = 7;\n"
"__constant int outValues[ 16 ] = { 17, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };\n"
"\n"
"__kernel void constant_kernel( __global int *out )\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" if( tid == 0 )\n"
" {\n"
" out[ 0 ] = outVal;\n"
" out[ 1 ] = outValues[ outIndex ];\n"
" }\n"
" else\n"
" {\n"
" out[ tid + 1 ] = outValues[ tid ];\n"
" }\n"
"}\n" };
int test_constant_source(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper outStream;
cl_int outValues[ 17 ];
cl_int expectedValues[ 17 ] = { 42, 1985, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };
cl_int error;
// Create a kernel to test with
error = create_single_kernel_helper( context, &program, &kernel, 1, constant_source_kernel_code, "constant_kernel" );
test_error( error, "Unable to create testing kernel" );
// Create our output buffer
outStream = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof( outValues ), NULL, &error );
test_error( error, "Unable to create output buffer" );
// Set the argument
error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
test_error( error, "Unable to set kernel argument" );
// Run test kernel
size_t threads[ 1 ] = { 16 };
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Unable to enqueue kernel" );
// Read results
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof( outValues ), outValues, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Verify results
for( int i = 0; i < 17; i++ )
{
if( expectedValues[ i ] != outValues[ i ] )
{
if( i == 0 )
log_error( "ERROR: Output value %d from constant source global did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
else if( i == 1 )
log_error( "ERROR: Output value %d from constant-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
else
log_error( "ERROR: Output value %d from variable-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,121 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *sample_single_kernel = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"};
const char *sample_double_kernel = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"
"__kernel void sample_test2(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"};
int
test_createkernelsinprogram(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_program program;
cl_kernel kernel[2];
unsigned int num_kernels;
size_t lengths[2];
int err;
lengths[0] = strlen(sample_single_kernel);
program = clCreateProgramWithSource(context, 1, &sample_single_kernel, lengths, NULL);
if (!program)
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgramExecutable failed\n");
return -1;
}
err = clCreateKernelsInProgram(program, 1, kernel, &num_kernels);
if ( (err != CL_SUCCESS) || (num_kernels != 1) )
{
log_error("clCreateKernelsInProgram test failed for a single kernel\n");
return -1;
}
clReleaseKernel(kernel[0]);
clReleaseProgram(program);
lengths[0] = strlen(sample_double_kernel);
program = clCreateProgramWithSource(context, 1, &sample_double_kernel, lengths, NULL);
if (!program)
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgramExecutable failed\n");
return -1;
}
err = clCreateKernelsInProgram(program, 2, kernel, &num_kernels);
if ( (err != CL_SUCCESS) || (num_kernels != 2) )
{
log_error("clCreateKernelsInProgram test failed for two kernels\n");
return -1;
}
log_info("clCreateKernelsInProgram test passed\n");
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
return err;
}

View File

@@ -0,0 +1,253 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
const cl_mem_flags flag_set[] = {
CL_MEM_ALLOC_HOST_PTR,
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_USE_HOST_PTR,
CL_MEM_COPY_HOST_PTR,
0
};
const char* flag_set_names[] = {
"CL_MEM_ALLOC_HOST_PTR",
"CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
"CL_MEM_USE_HOST_PTR",
"CL_MEM_COPY_HOST_PTR",
"0"
};
int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
const size_t bufferSize = 256*256;
int src_flag_id;
MTdata d = init_genrand( gRandomSeed );
cl_char *initialData = (cl_char*)malloc(bufferSize);
cl_char *finalData = (cl_char*)malloc(bufferSize);
for (src_flag_id=0; src_flag_id < 5; src_flag_id++)
{
clMemWrapper memObject;
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
generate_random_data( kChar, (unsigned int)bufferSize, d, initialData );
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
memObject = clCreateBuffer(context, flag_set[src_flag_id], bufferSize * sizeof( cl_char ), initialData, &error);
else
memObject = clCreateBuffer(context, flag_set[src_flag_id], bufferSize * sizeof( cl_char ), NULL, &error);
test_error( error, "Unable to create testing buffer" );
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
{
error = clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize * sizeof( cl_char ), initialData, 0, NULL, NULL);
test_error( error, "clEnqueueWriteBuffer failed");
}
for( int i = 0; i < 128; i++ )
{
size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d );
size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d );
cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
offset, length, 0, NULL, NULL, &error );
if( error != CL_SUCCESS )
{
print_error( error, "clEnqueueMapBuffer call failed" );
log_error( "\tOffset: %d Length: %d\n", (int)offset, (int)length );
free( initialData );
free( finalData );
free_mtdata(d);
return -1;
}
// Write into the region
for( size_t j = 0; j < length; j++ )
{
cl_char spin = (cl_char)genrand_int32( d );
// Test read AND write in one swipe
cl_char value = mappedRegion[ j ];
value = spin - value;
mappedRegion[ j ] = value;
// Also update the initial data array
value = initialData[ offset + j ];
value = spin - value;
initialData[ offset + j ] = value;
}
// Unmap
error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
test_error( error, "Unable to unmap buffer" );
}
// Final validation: read actual values of buffer and compare against our reference
error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, sizeof( cl_char ) * bufferSize, finalData, 0, NULL, NULL );
test_error( error, "Unable to read results" );
for( size_t q = 0; q < bufferSize; q++ )
{
if( initialData[ q ] != finalData[ q ] )
{
log_error( "ERROR: Sample %d did not validate! Got %d, expected %d\n", (int)q, (int)finalData[ q ], (int)initialData[ q ] );
free( initialData );
free( finalData );
free_mtdata(d);
return -1;
}
}
} // cl_mem flags
free( initialData );
free( finalData );
free_mtdata(d);
return 0;
}
int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 };
const size_t imageSize = 256;
int src_flag_id;
cl_uint *initialData;
cl_uint *finalData;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
initialData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
finalData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
if( !is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &format ) )
{
log_error( "ERROR: Test requires basic OpenCL 1.0 format CL_RGBA:CL_UNSIGNED_INT32, which is unsupported by this device!\n" );
free(initialData);
free(finalData);
return -1;
}
d = init_genrand( gRandomSeed );
for (src_flag_id=0; src_flag_id < 5; src_flag_id++) {
clMemWrapper memObject;
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
generate_random_data( kUInt, (unsigned int)( imageSize * imageSize ), d, initialData );
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
imageSize, imageSize, 0, initialData, &error );
else
memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
imageSize, imageSize, 0, NULL, &error );
test_error( error, "Unable to create testing buffer" );
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1};
error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, NULL, NULL, initialData, 0, NULL, NULL);
test_error( error, "Unable to write to testing buffer" );
}
for( int i = 0; i < 128; i++ )
{
size_t offset[3], region[3];
size_t rowPitch;
offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d );
offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d );
offset[ 2 ] = 0;
region[ 2 ] = 1;
cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
offset, region, &rowPitch, NULL, 0, NULL, NULL, &error );
if( error != CL_SUCCESS )
{
print_error( error, "clEnqueueMapImage call failed" );
log_error( "\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] );
free(initialData);
free(finalData);
free_mtdata(d);
return -1;
}
// Write into the region
cl_uint *mappedPtr = mappedRegion;
for( size_t y = 0; y < region[ 1 ]; y++ )
{
for( size_t x = 0; x < region[ 0 ] * 4; x++ )
{
cl_int spin = (cl_int)random_in_range( 16, 1024, d );
cl_int value;
// Test read AND write in one swipe
value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ];
value = spin - value;
mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value;
// Also update the initial data array
value = initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ];
value = spin - value;
initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ] = value;
}
}
// Unmap
error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
test_error( error, "Unable to unmap buffer" );
}
// Final validation: read actual values of buffer and compare against our reference
size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 };
error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL );
test_error( error, "Unable to read results" );
for( size_t q = 0; q < imageSize * imageSize * 4; q++ )
{
if( initialData[ q ] != finalData[ q ] )
{
log_error( "ERROR: Sample %d (coord %d,%d) did not validate! Got %d, expected %d\n", (int)q, (int)( ( q / 4 ) % imageSize ), (int)( ( q / 4 ) / imageSize ),
(int)finalData[ q ], (int)initialData[ q ] );
free(initialData);
free(finalData);
free_mtdata(d);
return -1;
}
}
} // cl_mem_flags
free(initialData);
free(finalData);
free_mtdata(d);
return 0;
}

View File

@@ -0,0 +1,384 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
#define DECLARE_S2V_IDENT_KERNEL(srctype,dsttype,size) \
"__kernel void test_conversion(__global " srctype " *sourceValues, __global " dsttype #size " *destValues )\n" \
"{\n" \
" int tid = get_global_id(0);\n" \
" " srctype " src = sourceValues[tid];\n" \
"\n" \
" destValues[tid] = (" dsttype #size ")src;\n" \
"\n" \
"}\n"
#define DECLARE_S2V_IDENT_KERNELS(srctype,dsttype) \
{ \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,2), \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,4), \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,8), \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,16) \
}
#define DECLARE_EMPTY { NULL, NULL, NULL, NULL, NULL }
/* Note: the next four arrays all must match in order and size to the ExplicitTypes enum in conversions.h!!! */
#define DECLARE_S2V_IDENT_KERNELS_SET(srctype) \
{ \
DECLARE_S2V_IDENT_KERNELS(#srctype,bool), \
DECLARE_S2V_IDENT_KERNELS(#srctype,char), \
DECLARE_S2V_IDENT_KERNELS(#srctype,uchar), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned char), \
DECLARE_S2V_IDENT_KERNELS(#srctype,short), \
DECLARE_S2V_IDENT_KERNELS(#srctype,ushort), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned short), \
DECLARE_S2V_IDENT_KERNELS(#srctype,int), \
DECLARE_S2V_IDENT_KERNELS(#srctype,uint), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned int), \
DECLARE_S2V_IDENT_KERNELS(#srctype,long), \
DECLARE_S2V_IDENT_KERNELS(#srctype,ulong), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned long), \
DECLARE_S2V_IDENT_KERNELS(#srctype,float), \
DECLARE_EMPTY \
}
#define DECLARE_EMPTY_SET \
{ \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY \
}
/* The overall array */
const char * kernel_explicit_s2v_set[kNumExplicitTypes][kNumExplicitTypes][5] = {
DECLARE_S2V_IDENT_KERNELS_SET(bool),
DECLARE_S2V_IDENT_KERNELS_SET(char),
DECLARE_S2V_IDENT_KERNELS_SET(uchar),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned char),
DECLARE_S2V_IDENT_KERNELS_SET(short),
DECLARE_S2V_IDENT_KERNELS_SET(ushort),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned short),
DECLARE_S2V_IDENT_KERNELS_SET(int),
DECLARE_S2V_IDENT_KERNELS_SET(uint),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned int),
DECLARE_S2V_IDENT_KERNELS_SET(long),
DECLARE_S2V_IDENT_KERNELS_SET(ulong),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned long),
DECLARE_S2V_IDENT_KERNELS_SET(float),
DECLARE_EMPTY_SET
};
int test_explicit_s2v_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *programSrc,
ExplicitType srcType, unsigned int count, ExplicitType destType, unsigned int vecSize, void *inputData )
{
clProgramWrapper program;
clKernelWrapper kernel;
int error;
clMemWrapper streams[2];
void *outData;
unsigned char convertedData[ 8 ]; /* Max type size is 8 bytes */
size_t threadSize[3], groupSize[3];
unsigned int i, s;
unsigned char *inPtr, *outPtr;
size_t paramSize, destTypeSize;
const char* finalProgramSrc[2] = {
"", // optional pragma
programSrc
};
if (srcType == kDouble || destType == kDouble) {
finalProgramSrc[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
}
if( programSrc == NULL )
return 0;
paramSize = get_explicit_type_size( srcType );
destTypeSize = get_explicit_type_size( destType );
size_t destStride = destTypeSize * vecSize;
outData = malloc( destStride * count );
if( create_single_kernel_helper( context, &program, &kernel, 2, finalProgramSrc, "test_conversion" ) )
{
log_info( "****** %s%s *******\n", finalProgramSrc[0], finalProgramSrc[1] );
return -1;
}
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error);
test_error( error, "clCreateBuffer failed");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), destStride * count, NULL, &error);
test_error( error, "clCreateBuffer failed");
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
test_error( error, "Unable to set indexed kernel arguments" );
/* Run the kernel */
threadSize[0] = count;
error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL );
test_error( error, "Unable to execute test kernel" );
/* Now verify the results. Each value should have been duplicated four times, and we should be able to just
do a memcpy instead of relying on the actual type of data */
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, destStride * count, outData, 0, NULL, NULL );
test_error( error, "Unable to read output values!" );
inPtr = (unsigned char *)inputData;
outPtr = (unsigned char *)outData;
for( i = 0; i < count; i++ )
{
/* Convert the input data element to our output data type to compare against */
convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
/* Now compare every element of the vector */
for( s = 0; s < vecSize; s++ )
{
if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
{
unsigned int *p = (unsigned int *)outPtr;
log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
log_error( " Input: 0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
log_error( " Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
return -1;
}
}
inPtr += paramSize;
outPtr += destStride;
}
free( outData );
return 0;
}
int test_explicit_s2v_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, ExplicitType srcType,
unsigned int count, void *inputData )
{
unsigned int sizes[] = { 2, 4, 8, 16, 0 };
int i, dstType, failed = 0;
for( dstType = kBool; dstType < kNumExplicitTypes; dstType++ )
{
if( dstType == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
continue;
if (( dstType == kLong || dstType == kULong ) && !gHasLong )
continue;
for( i = 0; sizes[i] != 0; i++ )
{
if( dstType != srcType )
continue;
if( strchr( get_explicit_type_name( (ExplicitType)srcType ), ' ' ) != NULL ||
strchr( get_explicit_type_name( (ExplicitType)dstType ), ' ' ) != NULL )
continue;
if( test_explicit_s2v_function( deviceID, context, queue, kernel_explicit_s2v_set[ srcType ][ dstType ][ i ],
srcType, count, (ExplicitType)dstType, sizes[ i ], inputData ) != 0 )
{
log_error( "ERROR: Explicit cast of scalar %s to vector %s%d FAILED; skipping other %s vector tests\n",
get_explicit_type_name(srcType), get_explicit_type_name((ExplicitType)dstType), sizes[i], get_explicit_type_name((ExplicitType)dstType) );
failed = -1;
break;
}
}
}
return failed;
}
int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
log_info( "NOTE: Boolean vectors not defined in OpenCL 1.0. Skipping test.\n" );
return 0;
#if 0
bool data[128];
generate_random_data( kBool, 128, data );
return test_explicit_s2v_function_set( deviceID, context, queue, kBool, 128, data );
#endif
}
int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
char data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kChar, 128, seed, data );
return test_explicit_s2v_function_set( deviceID, context, queue, kChar, 128, data );
}
int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
unsigned char data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kUChar, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kUChar, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedChar, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
short data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kShort, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kShort, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
unsigned short data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kUShort, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kUShort, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedShort, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kInt, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kInt, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
unsigned int data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kUInt, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kUInt, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedInt, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_long data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kLong, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kLong, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_ulong data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kULong, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kULong, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedLong, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
float data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kFloat, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kFloat, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
double data[128];
RandomSeed seed(gRandomSeed);
if( !is_extension_available( deviceID, "cl_khr_fp64" ) ) {
log_info("Extension cl_khr_fp64 not supported. Skipping test.\n");
return 0;
}
generate_random_data( kDouble, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kDouble, 128, data ) != 0 )
return -1;
return 0;
}

View File

@@ -0,0 +1,160 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *float2int_kernel_code =
"__kernel void test_float2int(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n";
int
verify_float2int(cl_float *inptr, cl_int *outptr, int n)
{
int i;
for (i=0; i<n; i++)
{
if (outptr[i] != (int)inptr[i])
{
log_error("FLOAT2INT test failed\n");
return -1;
}
}
log_info("FLOAT2INT test passed\n");
return 0;
}
int
test_float2int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_float *input_ptr;
cl_int *output_ptr;
cl_program program;
cl_kernel kernel;
void *values[2];
size_t lengths[1];
size_t threads[1];
int err;
int i;
MTdata d;
input_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*num_elements, (void *)input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
lengths[0] = strlen(float2int_kernel_code);
program = clCreateProgramWithSource(context, 1, &float2int_kernel_code, lengths, NULL);
if (!program)
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgramExecutable failed\n");
return -1;
}
kernel = clCreateKernel(program, "test_float2int", NULL);
if (!kernel)
{
log_error("clCreateKernel failed\n");
return -1;
}
values[0] = streams[0];
values[1] = streams[1];
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err = clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (size_t)num_elements;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_float2int(input_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,270 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "../../test_common/harness/rounding_mode.h"
#include "procs.h"
const char *fpadd_kernel_code =
"__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *fpsub_kernel_code =
"__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *fpmul_kernel_code =
"__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
static const float MAX_ERR = 1e-5f;
int
verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("FP_ADD float test failed\n");
return -1;
}
}
log_info("FP_ADD float test passed\n");
return 0;
}
int
verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("FP_SUB float test failed\n");
return -1;
}
}
log_info("FP_SUB float test passed\n");
return 0;
}
int
verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("FP_MUL float test failed\n");
return -1;
}
}
log_info("FP_MUL float test passed\n");
return 0;
}
int
test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[3];
cl_kernel kernel[3];
float *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_float) * num_elements;
int isRTZ = 0;
RoundingMode oldMode = kDefaultRoundingMode;
// check for floating point capabilities
cl_device_fp_config single_config = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
if (err) {
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
test_finish();
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
{
//Check to make sure we are an embedded device
char profile[32];
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if( err )
{
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
test_finish();
return -1;
}
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
{
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
test_finish();
return -1;
}
isRTZ = 1;
oldMode = get_round();
}
input_ptr[0] = (cl_float*)malloc(length);
input_ptr[1] = (cl_float*)malloc(length);
input_ptr[2] = (cl_float*)malloc(length);
output_ptr = (cl_float*)malloc(length);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
p = input_ptr[0];
for (i=0; i<num_elements; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[1];
for (i=0; i<num_elements; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[2];
for (i=0; i<num_elements; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error( err, "clEnqueueWriteBuffer failed.");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error( err, "clEnqueueWriteBuffer failed.");
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
test_error( err, "clEnqueueWriteBuffer failed.");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
test_error( err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
threads[0] = (unsigned int)num_elements;
for (i=0; i<3; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
if( isRTZ )
set_round( kRoundTowardZero, kfloat );
switch (i)
{
case 0:
err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
}
if( isRTZ )
set_round( oldMode, kfloat );
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<3; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
free_mtdata( d );
return err;
}

View File

@@ -0,0 +1,268 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "../../test_common/harness/rounding_mode.h"
#include "procs.h"
const char *fpadd2_kernel_code =
"__kernel void test_fpadd2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *fpsub2_kernel_code =
"__kernel void test_fpsub2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *fpmul2_kernel_code =
"__kernel void test_fpmul2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
int
verify_fpadd2(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("FP_ADD float2 test failed\n");
return -1;
}
}
log_info("FP_ADD float2 test passed\n");
return 0;
}
int
verify_fpsub2(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("FP_SUB float2 test failed\n");
return -1;
}
}
log_info("FP_SUB float2 test passed\n");
return 0;
}
int
verify_fpmul2(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("FP_MUL float2 test failed\n");
return -1;
}
}
log_info("FP_MUL float2 test passed\n");
return 0;
}
int
test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[3];
cl_kernel kernel[3];
cl_float *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_float) * 2 * num_elements;
int isRTZ = 0;
RoundingMode oldMode = kDefaultRoundingMode;
// check for floating point capabilities
cl_device_fp_config single_config = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
if (err) {
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
test_finish();
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
{
//Check to make sure we are an embedded device
char profile[32];
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if( err )
{
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
test_finish();
return -1;
}
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
{
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
test_finish();
return -1;
}
isRTZ = 1;
oldMode = get_round();
}
input_ptr[0] = (cl_float*)malloc(length);
input_ptr[1] = (cl_float*)malloc(length);
input_ptr[2] = (cl_float*)malloc(length);
output_ptr = (cl_float*)malloc(length);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
p = input_ptr[0];
for (i=0; i<num_elements*2; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
p = input_ptr[1];
for (i=0; i<num_elements*2; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
p = input_ptr[2];
for (i=0; i<num_elements*2; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd2_kernel_code, "test_fpadd2");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub2_kernel_code, "test_fpsub2");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul2_kernel_code, "test_fpmul2");
test_error( err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
free_mtdata(d);
d = NULL;
threads[0] = (unsigned int)num_elements;
for (i=0; i<3; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
if( isRTZ )
set_round( kRoundTowardZero, kfloat );
switch (i)
{
case 0:
err = verify_fpadd2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
break;
case 1:
err = verify_fpsub2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
break;
case 2:
err = verify_fpmul2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
break;
}
if( isRTZ )
set_round( oldMode, kfloat );
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<3; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,269 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/rounding_mode.h"
const char *fpadd4_kernel_code =
"__kernel void test_fpadd4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *fpsub4_kernel_code =
"__kernel void test_fpsub4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *fpmul4_kernel_code =
"__kernel void test_fpmul4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
int
verify_fpadd4(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("FP_ADD float4 test failed\n");
return -1;
}
}
log_info("FP_ADD float4 test passed\n");
return 0;
}
int
verify_fpsub4(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("FP_SUB float4 test failed\n");
return -1;
}
}
log_info("FP_SUB float4 test passed\n");
return 0;
}
int
verify_fpmul4(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("FP_MUL float4 test failed\n");
return -1;
}
}
log_info("FP_MUL float4 test passed\n");
return 0;
}
int
test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[3];
cl_kernel kernel[3];
cl_float *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_float) * 4 * num_elements;
int isRTZ = 0;
RoundingMode oldMode = kDefaultRoundingMode;
// check for floating point capabilities
cl_device_fp_config single_config = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
if (err) {
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
test_finish();
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
{
//Check to make sure we are an embedded device
char profile[32];
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if( err )
{
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
test_finish();
return -1;
}
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
{
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
test_finish();
return -1;
}
isRTZ = 1;
oldMode = get_round();
}
input_ptr[0] = (cl_float*)malloc(length);
input_ptr[1] = (cl_float*)malloc(length);
input_ptr[2] = (cl_float*)malloc(length);
output_ptr = (cl_float*)malloc(length);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
p = input_ptr[0];
for (i=0; i<num_elements*4; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[1];
for (i=0; i<num_elements*4; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[2];
for (i=0; i<num_elements*4; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
free_mtdata(d);
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd4_kernel_code, "test_fpadd4");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub4_kernel_code, "test_fpsub4");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul4_kernel_code, "test_fpmul4");
test_error( err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
threads[0] = (unsigned int)num_elements;
for (i=0; i<3; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
if( isRTZ )
set_round( kRoundTowardZero, kfloat );
switch (i)
{
case 0:
err = verify_fpadd4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
break;
case 1:
err = verify_fpsub4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
break;
case 2:
err = verify_fpmul4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
break;
}
if( isRTZ )
set_round( oldMode, kfloat );
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<3; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,284 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
#include <ctype.h>
const char *work_offset_test[] = {
"__kernel void test( __global int * outputID_A, \n"
" __global int * outputID_B, __global int * outputID_C )\n"
"{\n"
" size_t id0 = get_local_id( 0 ) + get_group_id( 0 ) * get_local_size( 0 );\n"
" size_t id1 = get_local_id( 1 ) + get_group_id( 1 ) * get_local_size( 1 );\n"
" size_t id2 = get_local_id( 2 ) + get_group_id( 2 ) * get_local_size( 2 );\n"
" size_t id = ( id2 * get_global_size( 0 ) * get_global_size( 1 ) ) + ( id1 * get_global_size( 0 ) ) + id0;\n"
"\n"
" outputID_A[ id ] = get_global_id( 0 );\n"
" outputID_B[ id ] = get_global_id( 1 );\n"
" outputID_C[ id ] = get_global_id( 2 );\n"
"}\n"
};
#define MAX_TEST_ITEMS 16 * 16 * 16
#define NUM_TESTS 16
#define MAX_OFFSET 256
#define CHECK_RANGE( v, m, c ) \
if( ( v >= (cl_int)m ) || ( v < 0 ) ) \
{ \
log_error( "ERROR: ouputID_%c[%lu]: %d is < 0 or >= %lu\n", c, i, v, m ); \
return -1; \
}
int check_results( size_t threads[], size_t offsets[], cl_int outputA[], cl_int outputB[], cl_int outputC[] )
{
size_t offsettedSizes[ 3 ] = { threads[ 0 ] + offsets[ 0 ], threads[ 1 ] + offsets[ 1 ], threads[ 2 ] + offsets[ 2 ] };
size_t limit = threads[ 0 ] * threads[ 1 ] * threads[ 2 ];
static char counts[ MAX_OFFSET + 32 ][ MAX_OFFSET + 16 ][ MAX_OFFSET + 16 ];
memset( counts, 0, sizeof( counts ) );
for( size_t i = 0; i < limit; i++ )
{
// Check ranges first
CHECK_RANGE( outputA[ i ], offsettedSizes[ 0 ], 'A' )
CHECK_RANGE( outputB[ i ], offsettedSizes[ 1 ], 'B' )
CHECK_RANGE( outputC[ i ], offsettedSizes[ 2 ], 'C' )
// Now set the value in the map
counts[ outputA[ i ] ][ outputB[ i ] ][ outputC[ i ] ]++;
}
// Now check the map
int missed = 0, multiple = 0, errored = 0, corrected = 0;
for( size_t x = 0; x < offsettedSizes[ 0 ]; x++ )
{
for( size_t y = 0; y < offsettedSizes[ 1 ]; y++ )
{
for( size_t z = 0; z < offsettedSizes[ 2 ]; z++ )
{
const char * limitMsg = " (further errors of this type suppressed)";
if( ( x >= offsets[ 0 ] ) && ( y >= offsets[ 1 ] ) && ( z >= offsets[ 2 ] ) )
{
if( counts[ x ][ y ][ z ] < 1 )
{
if( missed < 3 )
log_error( "ERROR: Map value (%ld,%ld,%ld) was missed%s\n", x, y, z, ( missed == 2 ) ? limitMsg : "" );
missed++;
}
else if( counts[ x ][ y ][ z ] > 1 )
{
if( multiple < 3 )
log_error( "ERROR: Map value (%ld,%ld,%ld) was returned multiple times%s\n", x, y, z, ( multiple == 2 ) ? limitMsg : "" );
multiple++;
}
}
else
{
if( counts[ x ][ y ][ z ] > 0 )
{
if( errored < 3 )
log_error( "ERROR: Map value (%ld,%ld,%ld) was erroneously returned%s\n", x, y, z, ( errored == 2 ) ? limitMsg : "" );
errored++;
}
}
}
}
}
if( missed || multiple || errored )
{
size_t diffs[3] = { ( offsets[ 0 ] > threads[ 0 ] ? 0 : threads[ 0 ] - offsets[ 0 ] ),
( offsets[ 1 ] > threads[ 1 ] ? 0 : threads[ 1 ] - offsets[ 1 ] ),
( offsets[ 2 ] > threads[ 2 ] ? 0 : threads[ 2 ] - offsets[ 2 ] ) };
int diff = (int)( ( threads[ 0 ] - diffs[ 0 ] ) * ( threads[ 1 ] - diffs[ 1 ] ) * ( threads[ 2 ] - diffs[ 2 ] ) );
if( ( multiple == 0 ) && ( missed == diff ) && ( errored == diff ) )
log_error( "ERROR: Global work offset values are not being respected by get_global_id()\n" );
else
log_error( "ERROR: Global work offset values did not function as expected (%d missed, %d reported multiple times, %d erroneously hit)\n",
missed, multiple, errored );
}
return ( missed | multiple | errored | corrected );
}
int test_global_work_offsets(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 7 ];
int error;
size_t threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
cl_int outputA[ MAX_TEST_ITEMS ], outputB[ MAX_TEST_ITEMS ], outputC[ MAX_TEST_ITEMS ];
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, work_offset_test, "test" ) != 0 )
{
return -1;
}
//// Create some output streams
// Use just one output array to init them all (no need to init every single stack storage here)
memset( outputA, 0xff, sizeof( outputA ) );
for( int i = 0; i < 3; i++ )
{
streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof(outputA), outputA, &error );
test_error( error, "Unable to create output array" );
}
// Run a few different times
MTdata seed = init_genrand( gRandomSeed );
for( int test = 0; test < NUM_TESTS; test++ )
{
// Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
threads[ 0 ] = random_in_range( 1, 32, seed );
threads[ 1 ] = random_in_range( 1, 16, seed );
threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
// Make sure we get the local thread count right
error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
test_error( error, "Unable to determine local work group sizes" );
// Randomize some offsets
for( int j = 0; j < 3; j++ )
offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
// Now set up and run
for( int i = 0; i < 3; i++ )
{
error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
test_error( error, "Unable to set indexed kernel arguments" );
}
error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
// Read our results back now
cl_int * resultBuffers[] = { outputA, outputB, outputC };
for( int i = 0; i < 3; i++ )
{
error = clEnqueueReadBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( outputA ), resultBuffers[ i ], 0, NULL, NULL );
test_error( error, "Unable to get result data" );
}
// Now we need to check the results. The outputs should have one entry for each possible ID,
// but they won't be in order, so we need to construct a count map to determine what we got
if( check_results( threads, offsets, outputA, outputB, outputC ) )
{
log_error( "\t(Test failed for global dim %ld,%ld,%ld, local dim %ld,%ld,%ld, offsets %ld,%ld,%ld)\n",
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
return -1;
}
}
free_mtdata(seed);
// All done!
return 0;
}
const char *get_offset_test[] = {
"__kernel void test( __global int * outOffsets )\n"
"{\n"
" // We use local ID here so we don't have to worry about offsets\n"
" // Also note that these should be the same for ALL threads, so we won't worry about contention\n"
" outOffsets[ 0 ] = (int)get_global_offset( 0 );\n"
" outOffsets[ 1 ] = (int)get_global_offset( 1 );\n"
" outOffsets[ 2 ] = (int)get_global_offset( 2 );\n"
"}\n"
};
int test_get_global_offset(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 1 ];
int error;
size_t threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
cl_int outOffsets[ 3 ];
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, get_offset_test, "test" ) != 0 )
{
return -1;
}
// Create some output streams, and storage for a single control ID
memset( outOffsets, 0xff, sizeof( outOffsets ) );
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof( outOffsets ), outOffsets, &error );
test_error( error, "Unable to create control ID buffer" );
// Run a few different times
MTdata seed = init_genrand( gRandomSeed );
for( int test = 0; test < NUM_TESTS; test++ )
{
// Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
threads[ 0 ] = random_in_range( 1, 32, seed );
threads[ 1 ] = random_in_range( 1, 16, seed );
threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
// Make sure we get the local thread count right
error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
test_error( error, "Unable to determine local work group sizes" );
// Randomize some offsets
for( int j = 0; j < 3; j++ )
offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
// Now set up and run
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
// Read our results back now
error = clEnqueueReadBuffer( queue, streams[ 0 ], CL_TRUE, 0, sizeof( outOffsets ), outOffsets, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
// And check!
int errors = 0;
for( int j = 0; j < 3; j++ )
{
if( outOffsets[ j ] != (cl_int)offsets[ j ] )
{
log_error( "ERROR: get_global_offset( %d ) did not return expected value (expected %ld, got %d)\n", j, offsets[ j ], outOffsets[ j ] );
errors++;
}
}
if( errors > 0 )
return errors;
}
free_mtdata(seed);
// All done!
return 0;
}

View File

@@ -0,0 +1,421 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int hi_offset( int index, int vectorSize) { return index + vectorSize / 2; }
int lo_offset( int index, int vectorSize) { return index; }
int even_offset( int index, int vectorSize ) { return index * 2; }
int odd_offset( int index, int vectorSize ) { return index * 2 + 1; }
typedef int (*OffsetFunc)( int index, int vectorSize );
static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset };
typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName );
static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" };
static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" };
static const unsigned int vector_sizes[] = { 1, 2, 3, 4, 8, 16};
static const unsigned int vector_aligns[] = { 1, 2, 4, 4, 8, 16};
static const unsigned int out_vector_idx[] = { 0, 0, 1, 1, 3, 4};
// if input is size vector_sizes[i], output is size
// vector_sizes[out_vector_idx[i]]
// input type name is strcat(gentype, vector_size_names[i]);
// and output type name is
// strcat(gentype, vector_size_names[out_vector_idx[i]]);
static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4,
-1,-1,-1,-1,-1,-1,-1,5};
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse );
int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_int *input_ptr, *output_ptr, *p;
int err;
cl_uint i;
int hasDouble = is_extension_available( device, "cl_khr_fp64" );
cl_uint vectorSize, operatorToUse;
cl_uint type;
MTdata d;
int expressionMode;
int numExpressionModes = 2;
size_t length = sizeof(cl_int) * 4 * n_elems;
input_ptr = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
p = input_ptr;
d = init_genrand( gRandomSeed );
for (i=0; i<4 * (cl_uint) n_elems; i++)
p[i] = genrand_int32(d);
free_mtdata(d); d = NULL;
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
{
// Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
size_t elementCount = length / kSizes[type];
cl_mem streams[2];
// skip double if unavailable
if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" )))
continue;
if( !gHasLong &&
( 0 == strcmp( test_str_names[type], "long" )) &&
( 0 == strcmp( test_str_names[type], "ulong" )))
continue;
log_info( "%s", test_str_names[type] );
fflush( stdout );
// Set up data streams for the type
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ )
{
log_info( " %s", operatorToUse_names[ operatorToUse ] );
fflush( stdout );
for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) {
for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) {
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint outVectorSize = out_vector_idx[vectorSize];
char expression[1024];
const char *source[] = {
"", // optional pragma string
"__kernel void test_", operatorToUse_names[ operatorToUse ], "_", test_str_names[type], vector_size_names[vectorSize],
"(__global ", test_str_names[type], vector_size_names[vectorSize],
" *srcA, __global ", test_str_names[type], vector_size_names[outVectorSize],
" *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" ", test_str_names[type],
vector_size_names[out_vector_idx[vectorSize]],
" tmp = ", expression, ".", operatorToUse_names[ operatorToUse ], ";\n"
" dst[tid] = tmp;\n"
"}\n"
};
if(expressionMode == 0) {
sprintf(expression, "srcA[tid]");
} else if(expressionMode == 1) {
switch(vector_sizes[vectorSize]) {
case 16:
sprintf(expression,
"((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))",
test_str_names[type]
);
break;
case 8:
sprintf(expression,
"((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))",
test_str_names[type]
);
break;
case 4:
sprintf(expression,
"((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))",
test_str_names[type]
);
break;
case 3:
sprintf(expression,
"((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))",
test_str_names[type]
);
break;
case 2:
sprintf(expression,
"((%s2)(srcA[tid].s0, srcA[tid].s1))",
test_str_names[type]
);
break;
default :
sprintf(expression, "srcA[tid]");
log_info("Default\n");
}
} else {
sprintf(expression, "srcA[tid]");
}
if (0 == strcmp( test_str_names[type], "double" ))
source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
char kernelName[128];
snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] );
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
if (err)
return -1;
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
//Wipe the output buffer clean
uint32_t pattern = 0xdeadbeef;
memset_pattern4( output_ptr, &pattern, length );
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
size_t size = elementCount / (vector_aligns[vectorSize]);
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
char *inP = (char *)input_ptr;
char *outP = (char *)output_ptr;
outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) -
( vector_sizes[ out_vector_idx[vectorSize] ] ) );
// was outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
for( size_t e = 0; e < size; e++ )
{
if( CheckResults( inP, outP, 1, type, vectorSize, operatorToUse ) ) {
log_info("e is %d\n", (int)e);
fflush(stdout);
// break;
return -1;
}
inP += kSizes[type] * ( vector_aligns[vectorSize] );
outP += kSizes[type] * ( vector_aligns[outVectorSize] );
}
clReleaseKernel( kernel );
clReleaseProgram( program );
log_info( "." );
fflush( stdout );
}
}
}
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
log_info( "done\n" );
}
log_info("HiLoEO test passed\n");
free(input_ptr);
free(output_ptr);
return err;
}
static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse )
{
cl_ulong array[8];
void *p = array;
size_t halfVectorSize = vector_sizes[out_vector_idx[vectorSize]];
size_t cmpVectorSize = vector_sizes[out_vector_idx[vectorSize]];
// was 1 << (vectorSize-1);
OffsetFunc f = offsetFuncs[ operatorToUse ];
size_t elementSize = kSizes[type];
if(vector_size_names[vectorSize][0] == '3') {
if(operatorToUse_names[operatorToUse][0] == 'h' ||
operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
{
cmpVectorSize = 1; // special case for vec3 ignored values
}
}
switch( elementSize )
{
case 1:
{
char *i = (char*)in;
char *o = (char*)out;
size_t j;
cl_uint k;
OffsetFunc f = offsetFuncs[ operatorToUse ];
for( k = 0; k < elementCount; k++ )
{
char *o2 = (char*)p;
for( j = 0; j < halfVectorSize; j++ )
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
if( memcmp( o, o2, elementSize * cmpVectorSize ) )
{
log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
for( j = 1; j < halfVectorSize * 2; j++ )
log_info( ", %d", i[j] );
log_info( " } --> { %d", o[0] );
for( j = 1; j < halfVectorSize; j++ )
log_info( ", %d", o[j] );
log_info( " }\n" );
return -1;
}
i += 2 * halfVectorSize;
o += halfVectorSize;
}
}
break;
case 2:
{
short *i = (short*)in;
short *o = (short*)out;
size_t j;
cl_uint k;
for( k = 0; k < elementCount; k++ )
{
short *o2 = (short*)p;
for( j = 0; j < halfVectorSize; j++ )
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
if( memcmp( o, o2, elementSize * cmpVectorSize ) )
{
log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
for( j = 1; j < halfVectorSize * 2; j++ )
log_info( ", %d", i[j] );
log_info( " } --> { %d", o[0] );
for( j = 1; j < halfVectorSize; j++ )
log_info( ", %d", o[j] );
log_info( " }\n" );
return -1;
}
i += 2 * halfVectorSize;
o += halfVectorSize;
}
}
break;
case 4:
{
int *i = (int*)in;
int *o = (int*)out;
size_t j;
cl_uint k;
for( k = 0; k < elementCount; k++ )
{
int *o2 = (int *)p;
for( j = 0; j < halfVectorSize; j++ )
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
for( j = 0; j < cmpVectorSize; j++ )
{
/* Allow float nans to be binary different */
if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
{
log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
for( j = 1; j < halfVectorSize * 2; j++ )
log_info( ", 0x%8.8x", i[j] );
log_info( " } --> { 0x%8.8x", o[0] );
for( j = 1; j < halfVectorSize; j++ )
log_info( ", 0x%8.8x", o[j] );
log_info( " }\n" );
return -1;
}
}
i += 2 * halfVectorSize;
o += halfVectorSize;
}
}
break;
case 8:
{
cl_ulong *i = (cl_ulong*)in;
cl_ulong *o = (cl_ulong*)out;
size_t j;
cl_uint k;
for( k = 0; k < elementCount; k++ )
{
cl_ulong *o2 = (cl_ulong*)p;
for( j = 0; j < halfVectorSize; j++ )
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
if( memcmp( o, o2, elementSize * cmpVectorSize ) )
{
log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
for( j = 1; j < halfVectorSize * 2; j++ )
log_info( ", 0x%16.16llx", i[j] );
log_info( " } --> { 0x%16.16llx", o[0] );
for( j = 1; j < halfVectorSize; j++ )
log_info( ", 0x%16.16llx", o[j] );
log_info( " }\n" );
return -1;
}
i += 2 * halfVectorSize;
o += halfVectorSize;
}
}
break;
default:
log_info( "Internal error. Unknown data type\n" );
return -2;
}
return 0;
}

View File

@@ -0,0 +1,276 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *hostptr_kernel_code =
"__kernel void test_hostptr(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
static const float MAX_ERR = 1e-5f;
static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n)
{
cl_float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
return -1;
}
}
return 0;
}
static void make_random_data(unsigned count, float *ptr, MTdata d)
{
cl_uint i;
for (i=0; i<count; i++)
ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), d);
}
static unsigned char *
generate_rgba8_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static unsigned char *
randomize_rgba8_image(unsigned char *ptr, int w, int h, MTdata d)
{
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static int
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
int
test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_float *input_ptr[2], *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[3]={0,0,0};
cl_image_format img_format;
cl_uchar *rgba8_inptr, *rgba8_outptr;
void *lock_buffer;
int img_width = 512;
int img_height = 512;
cl_int err;
MTdata d;
RoundingMode oldRoundMode;
int isRTZ = 0;
// Block to mark deletion of streams before deletion of host_ptr
{
clMemWrapper streams[7];
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
// Alloc buffers
input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
d = init_genrand( gRandomSeed );
rgba8_inptr = (cl_uchar *)generate_rgba8_image(img_width, img_height, d);
rgba8_outptr = (cl_uchar *)malloc(sizeof(cl_uchar) * 4 * img_width * img_height);
// Random data
make_random_data(num_elements, input_ptr[0], d);
make_random_data(num_elements, input_ptr[1], d);
// Create host-side input
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[0], &err);
test_error(err, "clCreateBuffer 0 failed");
// Create a copied input
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[1], &err);
test_error(err, "clCreateBuffer 1 failed");
// Create a host-side output
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, output_ptr, &err);
test_error(err, "clCreateBuffer 2 failed");
// Create a host-side input
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
test_error(err, "create_image_2d 3 failed");
// Create a copied input
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
test_error(err, "create_image_2d 4 failed");
// Create a host-side output
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
test_error(err, "create_image_2d 5 failed");
// Create a copied output
img_format.image_channel_data_type = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[6] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
test_error(err, "create_image_2d 6 failed");
err = create_single_kernel_helper(context, &program, &kernel,1, &hostptr_kernel_code, "test_hostptr" );
test_error(err, "create_single_kernel_helper failed");
// Execute kernel
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
test_error(err, "clSetKernelArg failed");
threads[0] = (size_t)num_elements;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed");
cl_float *data = (cl_float*) clEnqueueMapBuffer( queue, streams[2], CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float) * num_elements, 0, NULL, NULL, &err );
test_error( err, "clEnqueueMapBuffer failed" );
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
{
oldRoundMode = set_round(kRoundTowardZero, kfloat);
isRTZ = 1;
}
if (isRTZ)
oldRoundMode = set_round(kRoundTowardZero, kfloat);
// Verify that we got the expected results back on the host side
err = verify_hostptr(input_ptr[0], input_ptr[1], data, num_elements);
if (err)
{
log_error("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
"and a CL_MEM_USE_HOST_PTR output did not return the expected results.\n");
} else {
log_info("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
"and a CL_MEM_USE_HOST_PTR output returned the expected results.\n");
}
if (isRTZ)
set_round(oldRoundMode, kfloat);
err = clEnqueueUnmapMemObject( queue, streams[2], data, 0, NULL, NULL );
test_error( err, "clEnqueueUnmapMemObject failed" );
size_t origin[3]={0,0,0}, region[3]={img_width, img_height, 1};
randomize_rgba8_image(rgba8_outptr, img_width, img_height, d);
free_mtdata(d); d = NULL;
// Copy from host-side to host-side
log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR...\n");
err = clEnqueueCopyImage(queue, streams[3], streams[5],
origin, origin, region, 0, NULL, NULL);
test_error(err, "clEnqueueCopyImage failed");
log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR image passed.\n");
// test the lock buffer interface
log_info("Mapping the CL_MEM_USE_HOST_PTR image with clEnqueueMapImage...\n");
size_t row_pitch;
lock_buffer = clEnqueueMapImage(queue, streams[5], CL_TRUE,
CL_MAP_READ, origin, region,
&row_pitch, NULL,
0, NULL, NULL, &err);
test_error(err, "clEnqueueMapImage failed");
err = verify_rgba8_image(rgba8_inptr, (unsigned char*)lock_buffer, img_width, img_height);
if (err != CL_SUCCESS)
{
log_error("verify_rgba8_image FAILED after clEnqueueMapImage\n");
return -1;
}
log_info("verify_rgba8_image passed after clEnqueueMapImage\n");
err = clEnqueueUnmapMemObject(queue, streams[5], lock_buffer, 0, NULL, NULL);
test_error(err, "clEnqueueUnmapMemObject failed");
// Copy host-side to device-side and read back
log_info("clEnqueueCopyImage CL_MEM_USE_HOST_PTR to CL_MEM_COPY_HOST_PTR...\n");
err = clEnqueueCopyImage(queue, streams[3], streams[5],
origin, origin, region,
0, NULL, NULL);
test_error(err, "clEnqueueCopyImage failed");
err = clEnqueueReadImage(queue, streams[5], CL_TRUE, origin, region, 4*img_width, 0, rgba8_outptr, 0, NULL, NULL);
test_error(err, "clEnqueueReadImage failed");
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
if (err != CL_SUCCESS)
{
log_error("verify_rgba8_image FAILED after clEnqueueCopyImage, clEnqueueReadImage\n");
return -1;
}
log_info("verify_rgba8_image passed after clEnqueueCopyImage, clEnqueueReadImage\n");
}
// cleanup
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr[0]);
free(input_ptr[1]);
free(output_ptr);
free(rgba8_inptr);
free(rgba8_outptr);
return err;
}

View File

@@ -0,0 +1,165 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *conditional_kernel_code =
"__kernel void test_if(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" if (src[tid] == 0)\n"
" dst[tid] = 0x12345678;\n"
" else if (src[tid] == 1)\n"
" dst[tid] = 0x23456781;\n"
" else if (src[tid] == 2)\n"
" dst[tid] = 0x34567812;\n"
" else if (src[tid] == 3)\n"
" dst[tid] = 0x45678123;\n"
" else if (src[tid] == 4)\n"
" dst[tid] = 0x56781234;\n"
" else if (src[tid] == 5)\n"
" dst[tid] = 0x67812345;\n"
" else if (src[tid] == 6)\n"
" dst[tid] = 0x78123456;\n"
" else if (src[tid] == 7)\n"
" dst[tid] = 0x81234567;\n"
" else\n"
" dst[tid] = 0x7FFFFFFF;\n"
"\n"
"}\n";
const int results[] = {
0x12345678,
0x23456781,
0x34567812,
0x45678123,
0x56781234,
0x67812345,
0x78123456,
0x81234567,
};
int
verify_if(int *inptr, int *outptr, int n)
{
int r, i;
for (i=0; i<n; i++)
{
if (inptr[i] <= 7)
r = results[inptr[i]];
else
r = 0x7FFFFFFF;
if (r != outptr[i])
{
log_error("IF test failed\n");
return -1;
}
}
log_info("IF test passed\n");
return 0;
}
int test_if(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_int *input_ptr, *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_int) * num_elements;
input_ptr = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
for (i=0; i<num_elements; i++)
input_ptr[i] = (int)get_random_float(0, 32, d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &conditional_kernel_code, "test_if" );
if (err)
return -1;
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadArray failed\n");
return -1;
}
err = verify_if(input_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,643 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *image_to_image_kernel_integer_coord_code =
"\n"
"__kernel void image_to_image_copy(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n";
static const char *image_to_image_kernel_float_coord_code =
"\n"
"__kernel void image_to_image_copy(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, sampler, (float2)((float)tid_x, (float)tid_y));\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n";
static const char *image_sum_kernel_integer_coord_code =
"\n"
"__kernel void image_sum(read_only image2d_t srcimg0, read_only image2d_t srcimg1, write_only image2d_t dstimg, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 color0;\n"
" float4 color1;\n"
"\n"
" color0 = read_imagef(srcimg0, sampler, (int2)(tid_x, tid_y));\n"
" color1 = read_imagef(srcimg1, sampler, (int2)(tid_x, tid_y));\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color0 + color1);\n"
"\n"
"}\n";
static const char *image_sum_kernel_float_coord_code =
"\n"
"__kernel void image_sum(read_only image2d_t srcimg0, read_only image2d_t srcimg1, write_only image2d_t dstimg, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 color0;\n"
" float4 color1;\n"
"\n"
" color0 = read_imagef(srcimg0, sampler, (float2)((float)tid_x, (float)tid_y));\n"
" color1 = read_imagef(srcimg1, sampler, (float2)((float)tid_x, (float)tid_y));\n"
" write_imagef(dstimg,(int2)(tid_x, tid_y), color0 + color1);\n"
"\n"
"}\n";
static unsigned char *
generate_initial_byte_image(int w, int h, int num_elements, unsigned char value)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * num_elements);
int i;
for (i = 0; i < w*h*num_elements; i++)
ptr[i] = value;
return ptr;
}
static unsigned char *
generate_expected_byte_image(unsigned char **input_data, int num_inputs, int w, int h, int num_elements)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * num_elements);
int i;
for (i = 0; i < w*h*num_elements; i++)
{
int j;
ptr[i] = 0;
for (j = 0; j < num_inputs; j++)
{
unsigned char *input = *(input_data + j);
ptr[i] += input[i];
}
}
return ptr;
}
static unsigned char *
generate_byte_image(int w, int h, int num_elements, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * num_elements);
int i;
for (i = 0; i < w*h*num_elements; i++)
ptr[i] = (unsigned char)genrand_int32(d) & 31;
return ptr;
}
static int
verify_byte_image(unsigned char *image, unsigned char *outptr, int w, int h, int num_elements)
{
int i;
for (i = 0; i < w*h*num_elements; i++)
{
if (outptr[i] != image[i])
{
return -1;
}
}
return 0;
}
int
test_image_multipass_integer_coord(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
int img_width = 512;
int img_height = 512;
cl_image_format img_format;
int num_input_streams = 8;
cl_mem *input_streams;
cl_mem accum_streams[2];
unsigned char *expected_output;
unsigned char *output_ptr;
cl_kernel kernel[2];
int err;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
expected_output = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
// Create the accum images with initial data.
{
unsigned char *initial_data;
cl_mem_flags flags;
initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
if (!accum_streams[0])
{
log_error("create_image_2d failed\n");
free(expected_output);
free(output_ptr);
return -1;
}
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, accum_streams[0], CL_TRUE,
origin, region, 0, 0,
initial_data, 0, NULL, NULL);
if (err)
{
log_error("clWriteImage failed: %d\n", err);
free(expected_output);
free(output_ptr);
return -1;
}
accum_streams[1] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
if (!accum_streams[1])
{
log_error("create_image_2d failed\n");
free(expected_output);
free(output_ptr);
return -1;
}
err = clEnqueueWriteImage(queue, accum_streams[1], CL_TRUE,
origin, region, 0, 0,
initial_data, 0, NULL, NULL);
if (err)
{
log_error("clWriteImage failed: %d\n", err);
free(expected_output);
free(output_ptr);
return -1;
}
free(initial_data);
}
// Set up the input data.
{
cl_mem_flags flags;
unsigned char **input_data = (unsigned char **)malloc(sizeof(unsigned char*) * num_input_streams);
MTdata d;
input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
int i;
d = init_genrand( gRandomSeed );
for ( i = 0; i < num_input_streams; i++)
{
input_data[i] = generate_byte_image(img_width, img_height, 4, d);
input_streams[i] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
if (!input_streams[i])
{
log_error("create_image_2d failed\n");
free_mtdata(d);
free(expected_output);
free(output_ptr);
return -1;
}
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, input_streams[i], CL_TRUE,
origin, region, 0, 0,
input_data[i], 0, NULL, NULL);
if (err)
{
log_error("clWriteImage failed: %d\n", err);
free_mtdata(d);
free(expected_output);
free(output_ptr);
free(input_streams);
return -1;
}
}
free_mtdata(d); d = NULL;
expected_output = generate_expected_byte_image(input_data, num_input_streams, img_width, img_height, 4);
for ( i = 0; i < num_input_streams; i++)
{
free(input_data[i]);
}
free( input_data );
}
// Set up the kernels.
{
cl_program program[4];
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &image_to_image_kernel_integer_coord_code, "image_to_image_copy");
if (err)
{
log_error("Failed to create kernel 0: %d\n", err);
return -1;
}
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &image_sum_kernel_integer_coord_code, "image_sum");
if (err)
{
log_error("Failed to create kernel 1: %d\n", err);
return -1;
}
clReleaseProgram(program[0]);
clReleaseProgram(program[1]);
}
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
{
size_t threads[3] = {0, 0, 0};
threads[0] = (size_t)img_width;
threads[1] = (size_t)img_height;
int i;
{
cl_mem accum_input;
cl_mem accum_output;
err = clSetKernelArg(kernel[0], 0, sizeof input_streams[0], &input_streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof accum_streams[0], &accum_streams[0]);
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
for (i = 1; i < num_input_streams; i++)
{
accum_input = accum_streams[(i-1)%2];
accum_output = accum_streams[i%2];
err = clSetKernelArg(kernel[1], 0, sizeof accum_input, &accum_input);
err |= clSetKernelArg(kernel[1], 1, sizeof input_streams[i], &input_streams[i]);
err |= clSetKernelArg(kernel[1], 2, sizeof accum_output, &accum_output);
err |= clSetKernelArg(kernel[1], 3, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
}
// Copy the last accum into the other one.
accum_input = accum_streams[(i-1)%2];
accum_output = accum_streams[i%2];
err = clSetKernelArg(kernel[0], 0, sizeof accum_input, &accum_input);
err |= clSetKernelArg(kernel[0], 1, sizeof accum_output, &accum_output);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
err = clEnqueueReadImage(queue, accum_output, CL_TRUE,
origin, region, 0, 0,
(void *)output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadImage failed\n");
return -1;
}
err = verify_byte_image(expected_output, output_ptr, img_width, img_height, 4);
if (err)
{
log_error("IMAGE_MULTIPASS test failed.\n");
}
else
{
log_info("IMAGE_MULTIPASS test passed\n");
}
}
clReleaseSampler(sampler);
}
// cleanup
clReleaseMemObject(accum_streams[0]);
clReleaseMemObject(accum_streams[1]);
{
int i;
for (i = 0; i < num_input_streams; i++)
{
clReleaseMemObject(input_streams[i]);
}
}
free(input_streams);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
free(expected_output);
free(output_ptr);
return err;
}
int
test_image_multipass_float_coord(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
int img_width = 512;
int img_height = 512;
cl_image_format img_format;
int num_input_streams = 8;
cl_mem *input_streams;
cl_mem accum_streams[2];
unsigned char *expected_output;
unsigned char *output_ptr;
cl_kernel kernel[2];
int err;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
// Create the accum images with initial data.
{
unsigned char *initial_data;
cl_mem_flags flags;
initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
if (!accum_streams[0])
{
log_error("create_image_2d failed\n");
return -1;
}
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, accum_streams[0], CL_TRUE,
origin, region, 0, 0,
initial_data, 0, NULL, NULL);
if (err)
{
log_error("clWriteImage failed: %d\n", err);
return -1;
}
accum_streams[1] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
if (!accum_streams[1])
{
log_error("create_image_2d failed\n");
return -1;
}
err = clEnqueueWriteImage(queue, accum_streams[1], CL_TRUE,
origin, region, 0, 0,
initial_data, 0, NULL, NULL);
if (err)
{
log_error("clWriteImage failed: %d\n", err);
return -1;
}
free(initial_data);
}
// Set up the input data.
{
cl_mem_flags flags;
unsigned char **input_data = (unsigned char **)malloc(sizeof(unsigned char*) * num_input_streams);
MTdata d;
input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
int i;
d = init_genrand( gRandomSeed );
for ( i = 0; i < num_input_streams; i++)
{
input_data[i] = generate_byte_image(img_width, img_height, 4, d);
input_streams[i] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
if (!input_streams[i])
{
log_error("create_image_2d failed\n");
free(input_data);
free(input_streams);
return -1;
}
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, input_streams[i], CL_TRUE,
origin, region, 0, 0,
input_data[i], 0, NULL, NULL);
if (err)
{
log_error("clWriteImage failed: %d\n", err);
free(input_data);
free(input_streams);
return -1;
}
}
free_mtdata(d); d = NULL;
expected_output = generate_expected_byte_image(input_data, num_input_streams, img_width, img_height, 4);
for ( i = 0; i < num_input_streams; i++)
{
free(input_data[i]);
}
free(input_data);
}
// Set up the kernels.
{
cl_program program[2];
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &image_to_image_kernel_float_coord_code, "image_to_image_copy");
if (err)
{
log_error("Failed to create kernel 2: %d\n", err);
return -1;
}
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &image_sum_kernel_float_coord_code, "image_sum");
if (err)
{
log_error("Failed to create kernel 3: %d\n", err);
return -1;
}
clReleaseProgram(program[0]);
clReleaseProgram(program[1]);
}
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
{
size_t threads[3] = {0, 0, 0};
threads[0] = (size_t)img_width;
threads[1] = (size_t)img_height;
int i;
{
cl_mem accum_input;
cl_mem accum_output;
err = clSetKernelArg(kernel[0], 0, sizeof input_streams[0], &input_streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof accum_streams[0], &accum_streams[0]);
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
for (i = 1; i < num_input_streams; i++)
{
accum_input = accum_streams[(i-1)%2];
accum_output = accum_streams[i%2];
err = clSetKernelArg(kernel[1], 0, sizeof accum_input, &accum_input);
err |= clSetKernelArg(kernel[1], 1, sizeof input_streams[i], &input_streams[i]);
err |= clSetKernelArg(kernel[1], 2, sizeof accum_output, &accum_output);
err |= clSetKernelArg(kernel[1], 3, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
}
// Copy the last accum into the other one.
accum_input = accum_streams[(i-1)%2];
accum_output = accum_streams[i%2];
err = clSetKernelArg(kernel[0], 0, sizeof accum_input, &accum_input);
err |= clSetKernelArg(kernel[0], 1, sizeof accum_output, &accum_output);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
err = clEnqueueReadImage(queue, accum_output, CL_TRUE,
origin, region, 0, 0,
(void *)output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadImage failed\n");
return -1;
}
err = verify_byte_image(expected_output, output_ptr, img_width, img_height, 4);
if (err)
{
log_error("IMAGE_MULTIPASS test failed.\n");
}
else
{
log_info("IMAGE_MULTIPASS test passed\n");
}
}
}
// cleanup
clReleaseSampler(sampler);
clReleaseMemObject(accum_streams[0]);
clReleaseMemObject(accum_streams[1]);
{
int i;
for (i = 0; i < num_input_streams; i++)
{
clReleaseMemObject(input_streams[i]);
}
}
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
free(expected_output);
free(output_ptr);
free(input_streams);
return err;
}

View File

@@ -0,0 +1,251 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/imageHelpers.h"
#include "../../test_common/harness/conversions.h"
static const char *param_kernel[] = {
"__kernel void test_fn(read_only image2d_t srcimg, sampler_t sampler, __global float4 *results )\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" results[ tid_y * get_image_width( srcimg ) + tid_x ] = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
"\n"
"}\n" };
int validate_results( size_t width, size_t height, cl_image_format &format, char *inputData, cl_float *actualResults )
{
for( size_t i = 0; i < width * height; i++ )
{
cl_float expected[ 4 ], tolerance;
switch( format.image_channel_data_type )
{
case CL_UNORM_INT8:
{
cl_uchar *p = (cl_uchar *)inputData;
expected[ 0 ] = p[ 0 ] / 255.f;
expected[ 1 ] = p[ 1 ] / 255.f;
expected[ 2 ] = p[ 2 ] / 255.f;
expected[ 3 ] = p[ 3 ] / 255.f;
tolerance = 1.f / 255.f;
break;
}
case CL_SNORM_INT8:
{
cl_char *p = (cl_char *)inputData;
expected[ 0 ] = fmaxf( p[ 0 ] / 127.f, -1.f );
expected[ 1 ] = fmaxf( p[ 1 ] / 127.f, -1.f );
expected[ 2 ] = fmaxf( p[ 2 ] / 127.f, -1.f );
expected[ 3 ] = fmaxf( p[ 3 ] / 127.f, -1.f );
tolerance = 1.f / 127.f;
break;
}
case CL_UNSIGNED_INT8:
{
cl_uchar *p = (cl_uchar *)inputData;
expected[ 0 ] = p[ 0 ];
expected[ 1 ] = p[ 1 ];
expected[ 2 ] = p[ 2 ];
expected[ 3 ] = p[ 3 ];
tolerance = 1.f / 127.f;
break;
}
case CL_SIGNED_INT8:
{
cl_short *p = (cl_short *)inputData;
expected[ 0 ] = p[ 0 ];
expected[ 1 ] = p[ 1 ];
expected[ 2 ] = p[ 2 ];
expected[ 3 ] = p[ 3 ];
tolerance = 1.f / 127.f;
break;
}
case CL_UNORM_INT16:
{
cl_ushort *p = (cl_ushort *)inputData;
expected[ 0 ] = p[ 0 ] / 65535.f;
expected[ 1 ] = p[ 1 ] / 65535.f;
expected[ 2 ] = p[ 2 ] / 65535.f;
expected[ 3 ] = p[ 3 ] / 65535.f;
tolerance = 1.f / 65535.f;
break;
}
case CL_UNSIGNED_INT32:
{
cl_uint *p = (cl_uint *)inputData;
expected[ 0 ] = p[ 0 ];
expected[ 1 ] = p[ 1 ];
expected[ 2 ] = p[ 2 ];
expected[ 3 ] = p[ 3 ];
tolerance = 0.0001f;
break;
}
case CL_FLOAT:
{
cl_float *p = (cl_float *)inputData;
expected[ 0 ] = p[ 0 ];
expected[ 1 ] = p[ 1 ];
expected[ 2 ] = p[ 2 ];
expected[ 3 ] = p[ 3 ];
tolerance = 0.0001f;
break;
}
default:
// Should never get here
break;
}
if( format.image_channel_order == CL_BGRA )
{
cl_float tmp = expected[ 0 ];
expected[ 0 ] = expected[ 2 ];
expected[ 2 ] = tmp;
}
// Within an error tolerance, make sure the results match
cl_float error1 = fabsf( expected[ 0 ] - actualResults[ 0 ] );
cl_float error2 = fabsf( expected[ 1 ] - actualResults[ 1 ] );
cl_float error3 = fabsf( expected[ 2 ] - actualResults[ 2 ] );
cl_float error4 = fabsf( expected[ 3 ] - actualResults[ 3 ] );
if( error1 > tolerance || error2 > tolerance || error3 > tolerance || error4 > tolerance )
{
log_error( "ERROR: Sample %d did not validate against expected results for %d x %d %s:%s image\n", (int)i, (int)width, (int)height,
GetChannelOrderName( format.image_channel_order ), GetChannelTypeName( format.image_channel_data_type ) );
log_error( " Expected: %f %f %f %f\n", (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ] );
log_error( " Actual: %f %f %f %f\n", (float)actualResults[ 0 ], (float)actualResults[ 1 ], (float)actualResults[ 2 ], (float)actualResults[ 3 ] );
// Check real quick a special case error here
cl_float error1 = fabsf( expected[ 3 ] - actualResults[ 0 ] );
cl_float error2 = fabsf( expected[ 2 ] - actualResults[ 1 ] );
cl_float error3 = fabsf( expected[ 1 ] - actualResults[ 2 ] );
cl_float error4 = fabsf( expected[ 0 ] - actualResults[ 3 ] );
if( error1 <= tolerance && error2 <= tolerance && error3 <= tolerance && error4 <= tolerance )
{
log_error( "\t(Kernel did not respect change in channel order)\n" );
}
return -1;
}
// Increment and go
actualResults += 4;
inputData += get_format_type_size( &format ) * 4;
}
return 0;
}
int test_image_param(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
size_t sizes[] = { 64, 100, 128, 250, 512 };
cl_image_format formats[] = { { CL_RGBA, CL_UNORM_INT8 }, { CL_RGBA, CL_UNORM_INT16 }, { CL_RGBA, CL_FLOAT }, { CL_BGRA, CL_UNORM_INT8 } };
ExplicitType types[] = { kUChar, kUShort, kFloat, kUChar };
int error;
size_t i, j, idx;
size_t threads[ 2 ];
MTdata d;
const size_t numSizes = sizeof( sizes ) / sizeof( sizes[ 0 ] );
const size_t numFormats = sizeof( formats ) / sizeof( formats[ 0 ] );
const size_t numAttempts = numSizes * numFormats;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ numAttempts ][ 2 ];
BufferOwningPtr<char> inputs[ numAttempts ];
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
for( i = 0, idx = 0; i < numSizes; i++ )
{
for( j = 0; j < numFormats; j++, idx++ )
{
// For each attempt, we create a pair: an input image, whose parameters keep changing, and an output buffer
// that we can read values from. The output buffer will remain consistent to ensure that any changes we
// witness are due to the image changes
inputs[ idx ].reset(create_random_data( types[ j ], d, sizes[ i ] * sizes[ i ] * 4 ));
streams[ idx ][ 0 ] = create_image_2d( context, CL_MEM_COPY_HOST_PTR, &formats[ j ], sizes[ i ], sizes[ i ], 0, inputs[ idx ], &error );
{
char err_str[256];
sprintf(err_str, "Unable to create input image for format %s order %s" ,
GetChannelOrderName( formats[j].image_channel_order ),
GetChannelTypeName( formats[j].image_channel_data_type ));
test_error( error, err_str);
}
streams[ idx ][ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), NULL, &error );
test_error( error, "Unable to create output buffer" );
}
}
free_mtdata(d); d = NULL;
// Create a single kernel to use for all the tests
error = create_single_kernel_helper( context, &program, &kernel, 1, param_kernel, "test_fn" );
test_error( error, "Unable to create testing kernel" );
// Also create a sampler to use for all the runs
clSamplerWrapper sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &error );
test_error( error, "clCreateSampler failed" );
// Set up the arguments for each and queue
for( i = 0, idx = 0; i < numSizes; i++ )
{
for( j = 0; j < numFormats; j++, idx++ )
{
error = clSetKernelArg( kernel, 0, sizeof( streams[ idx ][ 0 ] ), &streams[ idx ][ 0 ] );
error |= clSetKernelArg( kernel, 1, sizeof( sampler ), &sampler );
error |= clSetKernelArg( kernel, 2, sizeof( streams[ idx ][ 1 ] ), &streams[ idx ][ 1 ]);
test_error( error, "Unable to set kernel arguments" );
threads[ 0 ] = threads[ 1 ] = (size_t)sizes[ i ];
error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "clEnqueueNDRangeKernel failed" );
}
}
// Now go through each combo and validate the results
for( i = 0, idx = 0; i < numSizes; i++ )
{
for( j = 0; j < numFormats; j++, idx++ )
{
BufferOwningPtr<cl_float> output(malloc(sizeof(cl_float) * sizes[ i ] * sizes[ i ] * 4 ));
error = clEnqueueReadBuffer( queue, streams[ idx ][ 1 ], CL_TRUE, 0, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), output, 0, NULL, NULL );
test_error( error, "Unable to read results" );
error = validate_results( sizes[ i ], sizes[ i ], formats[ j ], inputs[ idx ], output );
if( error )
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,176 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *r_uint8_kernel_code =
"__kernel void test_r_uint8(read_only image2d_t srcimg, __global unsigned char *dst, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
" uint4 color;\n"
"\n"
" color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
" dst[indx] = (unsigned char)(color.x);\n"
"\n"
"}\n";
static unsigned char *
generate_8bit_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * sizeof(unsigned char));
int i;
for (i=0; i<w*h; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static int
verify_8bit_image(unsigned char *image, unsigned char *outptr, int w, int h)
{
int i;
for (i=0; i<w*h; i++)
{
if (outptr[i] != image[i])
{
log_error("READ_IMAGE_R_UNSIGNED_INT8 test failed\n");
return -1;
}
}
log_info("READ_IMAGE_R_UNSIGNED_INT8 test passed\n");
return 0;
}
int
test_image_r8(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_image_format img_format;
cl_uchar *input_ptr, *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[3];
int img_width = 512;
int img_height = 512;
int err;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
img_format.image_channel_order = CL_R;
img_format.image_channel_data_type = CL_UNSIGNED_INT8;
// early out if this image type is not supported
if( ! is_image_format_supported( context, (cl_mem_flags)(CL_MEM_READ_ONLY), CL_MEM_OBJECT_IMAGE2D, &img_format ) ) {
log_info("WARNING: Image type not supported; skipping test.\n");
return 0;
}
d = init_genrand( gRandomSeed );
input_ptr = generate_8bit_image(img_width, img_height, d);
free_mtdata(d); d = NULL;
output_ptr = (cl_uchar*)malloc(sizeof(cl_uchar) * img_width * img_height);
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_ONLY), &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[0])
{
log_error("create_image_2d failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uchar) * img_width*img_height, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
size_t origin[3] = {0,0,0}, region[3]={img_width, img_height, 1};
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE,
origin, region, 0, 0,
input_ptr,
0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteImage failed: %d\n", err);
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &r_uint8_kernel_code, "test_r_uint8" );
if (err) {
log_error("Failed to create kernel and program: %d\n", err);
return -1;
}
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed: %d\n", err);
return -1;
}
threads[0] = (size_t)img_width;
threads[1] = (size_t)img_height;
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_uchar)*img_width*img_height, (void *)output_ptr, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_8bit_image(input_ptr, output_ptr, img_width, img_height);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,146 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int test_imagearraycopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
{
cl_uchar *imgptr, *bufptr;
clMemWrapper image, buffer;
int img_width = 512;
int img_height = 512;
size_t elem_size;
size_t buffer_size;
int i;
cl_int err;
MTdata d;
cl_event copyevent;
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
test_error(err, "clGetImageInfo failed");
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
test_error(err, "clCreateBuffer failed");
d = init_genrand( gRandomSeed );
imgptr = (cl_uchar*)malloc(buffer_size);
for (i=0; i<(int)buffer_size; i++) {
imgptr[i] = (cl_uchar)genrand_int32(d);
}
free_mtdata(d); d = NULL;
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
err = clEnqueueWriteImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 0, NULL, NULL );
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyImageToBuffer( queue, image, buffer, origin, region, 0, 0, NULL, &copyevent );
test_error(err, "clEnqueueCopyImageToBuffer failed");
bufptr = (cl_uchar*)malloc(buffer_size);
err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 1, &copyevent, NULL);
test_error(err, "clEnqueueReadBuffer failed");
err = clReleaseEvent(copyevent);
test_error(err, "clReleaseEvent failed");
if (memcmp(imgptr, bufptr, buffer_size) != 0) {
log_error( "ERROR: Results did not validate!\n" );
unsigned char * inchar = (unsigned char*)imgptr;
unsigned char * outchar = (unsigned char*)bufptr;
int failuresPrinted = 0;
int i;
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
int failed = 0;
int j;
for (j=0; j<(int)elem_size; j++)
if (inchar[i+j] != outchar[i+j])
failed = 1;
char values[4096];
values[0] = 0;
if (failed) {
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
int j;
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
sprintf(values + strlen(values), "] != expected [");
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
err = -1;
}
free(imgptr);
free(bufptr);
if (err)
log_error("IMAGE to ARRAY copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
return err;
}
int test_imagearraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_int err;
cl_image_format *formats;
cl_uint num_formats;
cl_uint i;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
test_error(err, "clGetSupportedImageFormats failed");
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
test_error(err, "clGetSupportedImageFormats failed");
for (i = 0; i < num_formats; i++) {
err |= test_imagearraycopy_single_format(device, context, queue, &formats[i]);
}
free(formats);
if (err)
log_error("IMAGE to ARRAY copy test failed\n");
else
log_info("IMAGE to ARRAY copy test passed\n");
return err;
}

View File

@@ -0,0 +1,144 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int test_imagearraycopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
{
cl_uchar *imgptr, *bufptr;
clMemWrapper image, buffer;
int img_width = 128;
int img_height = 128;
int img_depth = 32;
size_t elem_size;
size_t buffer_size;
int i;
cl_int err;
MTdata d;
cl_event copyevent;
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
test_error(err, "clGetImageInfo failed");
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
test_error(err, "clCreateBuffer failed");
imgptr = (cl_uchar*)malloc(buffer_size);
d = init_genrand( gRandomSeed );
imgptr = (cl_uchar*)malloc(buffer_size);
for (i=0; i<(int)buffer_size; i++) {
imgptr[i] = (cl_uchar)genrand_int32(d);
}
free_mtdata(d); d = NULL;
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
err = clEnqueueWriteImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 0, NULL, NULL );
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyImageToBuffer( queue, image, buffer, origin, region, 0, 0, NULL, &copyevent );
test_error(err, "clEnqueueCopyImageToBuffer failed");
bufptr = (cl_uchar*)malloc(buffer_size);
err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 1, &copyevent, NULL);
test_error(err, "clEnqueueReadBuffer failed");
if (memcmp(imgptr, bufptr, buffer_size) != 0) {
log_error( "ERROR: Results did not validate!\n" );
unsigned char * inchar = (unsigned char*)imgptr;
unsigned char * outchar = (unsigned char*)bufptr;
int failuresPrinted = 0;
int i;
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
int failed = 0;
int j;
for (j=0; j<(int)elem_size; j++)
if (inchar[i+j] != outchar[i+j])
failed = 1;
char values[4096];
values[0] = 0;
if (failed) {
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
int j;
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
sprintf(values + strlen(values), "] != expected [");
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
err = -1;
}
free(imgptr);
free(bufptr);
if (err)
log_error("IMAGE3D to ARRAY copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
return err;
}
int test_imagearraycopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_int err;
cl_image_format *formats;
cl_uint num_formats;
cl_uint i;
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
test_error(err, "clGetSupportedImageFormats failed");
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
test_error(err, "clGetSupportedImageFormats failed");
for (i = 0; i < num_formats; i++) {
err |= test_imagearraycopy3d_single_format(device, context, queue, &formats[i]);
}
if (err)
log_error("IMAGE3D to ARRAY copy test failed\n");
else
log_info("IMAGE3D to ARRAY copy test passed\n");
return err;
}

View File

@@ -0,0 +1,234 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static unsigned char *
generate_rgba8_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static int
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
static unsigned short *
generate_rgba16_image(int w, int h, MTdata d)
{
unsigned short *ptr = (unsigned short *)malloc(w * h * 4 * sizeof(unsigned short));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned short)genrand_int32(d);
return ptr;
}
static int
verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
static float *
generate_rgbafp_image(int w, int h, MTdata d)
{
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
return ptr;
}
static int
verify_rgbafp_image(float *image, float *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
int
test_imagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_image_format img_format;
unsigned char *rgba8_inptr, *rgba8_outptr;
unsigned short *rgba16_inptr, *rgba16_outptr;
float *rgbafp_inptr, *rgbafp_outptr;
clMemWrapper streams[6];
int img_width = 512;
int img_height = 512;
int i, err;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
free_mtdata(d); d = NULL;
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
for (i=0; i<3; i++)
{
void *p, *outp;
int x, y, delta_w = img_width/8, delta_h = img_height/16;
switch (i)
{
case 0:
p = (void *)rgba8_inptr;
outp = (void *)rgba8_outptr;
log_info("Testing CL_RGBA CL_UNORM_INT8\n");
break;
case 1:
p = (void *)rgba16_inptr;
outp = (void *)rgba16_outptr;
log_info("Testing CL_RGBA CL_UNORM_INT16\n");
break;
case 2:
p = (void *)rgbafp_inptr;
outp = (void *)rgbafp_outptr;
log_info("Testing CL_RGBA CL_FLOAT\n");
break;
}
size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
test_error(err, "create_image_2d failed");
int copy_number = 0;
for (y=0; y<img_height; y+=delta_h)
{
for (x=0; x<img_width; x+=delta_w)
{
copy_number++;
size_t copy_origin[3] = {x,y,0}, copy_region[3]={delta_w, delta_h, 1};
err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1],
copy_origin, copy_origin, copy_region,
0, NULL, NULL);
if (err) {
log_error("Copy %d (origin [%d, %d], size [%d, %d], image size [%d x %d]) Failed\n", copy_number, x, y, delta_w, delta_h, img_width, img_height);
}
test_error(err, "clEnqueueCopyImage failed");
}
}
err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
test_error(err, "clEnqueueReadImage failed");
switch (i)
{
case 0:
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
break;
case 1:
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
break;
case 2:
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
break;
}
if (err)
break;
}
free(rgba8_inptr);
free(rgba16_inptr);
free(rgbafp_inptr);
free(rgba8_outptr);
free(rgba16_outptr);
free(rgbafp_outptr);
if (err)
log_error("IMAGE copy test failed\n");
else
log_info("IMAGE copy test passed\n");
return err;
}

View File

@@ -0,0 +1,237 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static unsigned char *
generate_uint8_image(unsigned num_elements, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(num_elements);
unsigned i;
for (i=0; i<num_elements; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static int
verify_uint8_image(unsigned char *image, unsigned char *outptr, unsigned num_elements)
{
unsigned i;
for (i=0; i<num_elements; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
static unsigned short *
generate_uint16_image(unsigned num_elements, MTdata d)
{
unsigned short *ptr = (unsigned short *)malloc(num_elements * sizeof(unsigned short));
unsigned i;
for (i=0; i<num_elements; i++)
ptr[i] = (unsigned short)genrand_int32(d);
return ptr;
}
static int
verify_uint16_image(unsigned short *image, unsigned short *outptr, unsigned num_elements)
{
unsigned i;
for (i=0; i<num_elements; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
static float *
generate_float_image(unsigned num_elements, MTdata d)
{
float *ptr = (float*)malloc(num_elements * sizeof(float));
unsigned i;
for (i=0; i<num_elements; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
return ptr;
}
static int
verify_float_image(float *image, float *outptr, unsigned num_elements)
{
unsigned i;
for (i=0; i<num_elements; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
int
test_imagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements_ignored)
{
cl_image_format img_format;
unsigned char *rgba8_inptr, *rgba8_outptr;
unsigned short *rgba16_inptr, *rgba16_outptr;
float *rgbafp_inptr, *rgbafp_outptr;
clMemWrapper streams[6];
int img_width = 128;
int img_height = 128;
int img_depth = 64;
int i;
cl_int err;
unsigned num_elements = img_width * img_height * img_depth * 4;
MTdata d;
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
rgba8_inptr = (unsigned char *)generate_uint8_image(num_elements, d);
rgba16_inptr = (unsigned short *)generate_uint16_image(num_elements, d);
rgbafp_inptr = (float *)generate_float_image(num_elements, d);
free_mtdata(d); d = NULL;
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * num_elements);
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * num_elements);
rgbafp_outptr = (float*)malloc(sizeof(float) * num_elements);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
streams[3] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[4] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
streams[5] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
for (i=0; i<3; i++)
{
void *p, *outp;
int x, y, z, delta_w = img_width/8, delta_h = img_height/16, delta_d = img_depth/4;
switch (i)
{
case 0:
p = (void *)rgba8_inptr;
outp = (void *)rgba8_outptr;
break;
case 1:
p = (void *)rgba16_inptr;
outp = (void *)rgba16_outptr;
break;
case 2:
p = (void *)rgbafp_inptr;
outp = (void *)rgbafp_outptr;
break;
}
size_t origin[3]={0,0,0}, region[3]={img_width, img_height, img_depth};
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
test_error(err, "clEnqueueWriteImage failed");
for (z=0; z<img_depth; z+=delta_d)
{
for (y=0; y<img_height; y+=delta_h)
{
for (x=0; x<img_width; x+=delta_w)
{
origin[0] = x; origin[1] = y; origin[2] = z;
region[0] = delta_w; region[1] = delta_h; region[2] = delta_d;
err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
test_error(err, "clEnqueueCopyImage failed");
}
}
}
origin[0] = 0; origin[1] = 0; origin[2] = 0;
region[0] = img_width; region[1] = img_height; region[2] = img_depth;
err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
test_error(err, "clEnqueueReadImage failed");
switch (i)
{
case 0:
err = verify_uint8_image(rgba8_inptr, rgba8_outptr, num_elements);
if (err) log_error("Failed uint8\n");
break;
case 1:
err = verify_uint16_image(rgba16_inptr, rgba16_outptr, num_elements);
if (err) log_error("Failed uint16\n");
break;
case 2:
err = verify_float_image(rgbafp_inptr, rgbafp_outptr, num_elements);
if (err) log_error("Failed float\n");
break;
}
if (err)
break;
}
free(rgba8_inptr);
free(rgba16_inptr);
free(rgbafp_inptr);
free(rgba8_outptr);
free(rgba16_outptr);
free(rgbafp_outptr);
if (err)
log_error("IMAGE3D copy test failed\n");
else
log_info("IMAGE3D copy test passed\n");
return err;
}

View File

@@ -0,0 +1,505 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *image_dim_kernel_code =
"\n"
"__kernel void test_image_dim(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n";
static unsigned char *
generate_8888_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static int
verify_8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
{
int i;
for (i=0; i<w*h; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
int
test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_mem streams[2];
cl_image_format img_format;
unsigned char *input_ptr, *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[2];
cl_ulong max_mem_size;
int img_width, max_img_width;
int img_height, max_img_height;
int max_img_dim;
int i, j, i2, j2, err=0;
size_t max_image2d_width, max_image2d_height;
int total_errors = 0;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
err = create_single_kernel_helper( context, &program, &kernel, 1, &image_dim_kernel_code, "test_image_dim" );
if (err)
{
log_error("create_program_and_kernel_with_sources failed\n");
return -1;
}
err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
if (err)
{
log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
return -1;
}
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
if (err)
{
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
return -1;
}
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
if (err)
{
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
return -1;
}
log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
max_img_width = (int)max_image2d_width;
max_img_height = (int)max_image2d_height;
// determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
// and we want to consume 1/4 of global memory (this is the minimum required to be
// supported by the spec)
max_mem_size /= 4; // use 1/4
max_mem_size /= 4; // 4 bytes per pixel
max_img_dim = (int)sqrt((double)max_mem_size);
// convert to a power of 2
{
unsigned int n = (unsigned int)max_img_dim;
unsigned int m = 0x80000000;
// round-down to the nearest power of 2
while (m > n)
m >>= 1;
max_img_dim = (int)m;
}
if (max_img_width > max_img_dim)
max_img_width = max_img_dim;
if (max_img_height > max_img_dim)
max_img_height = max_img_dim;
log_info("Adjusted maximum image size to test is %d x %d, which is a max mem size of %gMB.\n",
max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
d = init_genrand( gRandomSeed );
input_ptr = generate_8888_image(max_img_width, max_img_height, d);
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height);
// test power of 2 width, height starting at 1 to 4K
for (i=1,i2=0; i<=max_img_height; i<<=1,i2++)
{
img_height = (1 << i2);
for (j=1,j2=0; j<=max_img_width; j<<=1,j2++)
{
img_width = (1 << j2);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[0])
{
log_error("create_image_2d failed. width = %d, height = %d\n", img_width, img_height);
free(input_ptr);
free(output_ptr);
free_mtdata(d);
return -1;
}
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[1])
{
log_error("create_image_2d failed. width = %d, height = %d\n", img_width, img_height);
clReleaseMemObject(streams[0]);
free(input_ptr);
free(output_ptr);
free_mtdata(d);
return -1;
}
size_t origin[3] = {0,0,0};
size_t region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteImage failed\n");
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
free(input_ptr);
free(output_ptr);
free_mtdata(d);
return -1;
}
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
free(input_ptr);
free(output_ptr);
free_mtdata(d);
return -1;
}
threads[0] = (size_t)img_width;
threads[1] = (size_t)img_height;
log_info("Testing image dimensions %d x %d with local threads NULL.\n", img_width, img_height);
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
log_error("Image Dimension test failed. image width = %d, image height = %d, local NULL\n",
img_width, img_height);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
free(input_ptr);
free(output_ptr);
free_mtdata(d);
return -1;
}
err = clEnqueueReadImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadImage failed\n");
log_error("Image Dimension test failed. image width = %d, image height = %d, local NULL\n",
img_width, img_height);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
free(input_ptr);
free(output_ptr);
free_mtdata(d);
return -1;
}
err = verify_8888_image(input_ptr, output_ptr, img_width, img_height);
if (err)
{
total_errors++;
log_error("Image Dimension test failed. image width = %d, image height = %d\n", img_width, img_height);
}
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
}
}
// cleanup
free(input_ptr);
free(output_ptr);
free_mtdata(d);
clReleaseSampler(sampler);
clReleaseKernel(kernel);
clReleaseProgram(program);
return total_errors;
}
int
test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_mem streams[2];
cl_image_format img_format;
unsigned char *input_ptr, *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[2], local_threads[2];
cl_ulong max_mem_size;
int img_width, max_img_width;
int img_height, max_img_height;
int max_img_dim;
int i, j, i2, j2, err=0;
size_t max_image2d_width, max_image2d_height;
int total_errors = 0;
size_t max_local_workgroup_size[3];
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
err = create_single_kernel_helper( context, &program, &kernel, 1, &image_dim_kernel_code, "test_image_dim" );
if (err)
{
log_error("create_program_and_kernel_with_sources failed\n");
return -1;
}
size_t work_group_size = 0;
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(work_group_size), &work_group_size, NULL);
test_error(err, "clGetKerenlWorkgroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
if (err)
{
log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
return -1;
}
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
if (err)
{
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
return -1;
}
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
if (err)
{
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
return -1;
}
log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
max_img_width = (int)max_image2d_width;
max_img_height = (int)max_image2d_height;
// determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
// and we want to consume 1/4 of global memory (this is the minimum required to be
// supported by the spec)
max_mem_size /= 4; // use 1/4
max_mem_size /= 4; // 4 bytes per pixel
max_img_dim = (int)sqrt((double)max_mem_size);
// convert to a power of 2
{
unsigned int n = (unsigned int)max_img_dim;
unsigned int m = 0x80000000;
// round-down to the nearest power of 2
while (m > n)
m >>= 1;
max_img_dim = (int)m;
}
if (max_img_width > max_img_dim)
max_img_width = max_img_dim;
if (max_img_height > max_img_dim)
max_img_height = max_img_dim;
log_info("Adjusted maximum image size to test is %d x %d, which is a max mem size of %gMB.\n",
max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
d = init_genrand( gRandomSeed );
int plus_minus;
for (plus_minus=0; plus_minus < 3; plus_minus++)
{
// test power of 2 width, height starting at 1 to 4K
for (i=2,i2=1; i<=max_img_height; i<<=1,i2++)
{
img_height = (1 << i2);
for (j=2,j2=1; j<=max_img_width; j<<=1,j2++)
{
img_width = (1 << j2);
int effective_img_height = img_height;
int effective_img_width = img_width;
local_threads[0] = 1;
local_threads[1] = 1;
switch (plus_minus) {
case 0:
effective_img_height--;
local_threads[0] = work_group_size > max_local_workgroup_size[0] ? max_local_workgroup_size[0] : work_group_size;
while (img_width%local_threads[0] != 0)
local_threads[0]--;
break;
case 1:
effective_img_width--;
local_threads[1] = work_group_size > max_local_workgroup_size[1] ? max_local_workgroup_size[1] : work_group_size;
while (img_height%local_threads[1] != 0)
local_threads[1]--;
break;
case 2:
effective_img_width--;
effective_img_height--;
break;
default:
break;
}
input_ptr = generate_8888_image(effective_img_width, effective_img_height, d);
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * effective_img_width * effective_img_height);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
if (!streams[0])
{
log_error("create_image_2d failed. width = %d, height = %d\n", effective_img_width, effective_img_height);
free(input_ptr);
free(output_ptr);
free_mtdata(d);
return -1;
}
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
if (!streams[1])
{
log_error("create_image_2d failed. width = %d, height = %d\n", effective_img_width, effective_img_height);
clReleaseMemObject(streams[0]);
free(input_ptr);
free(output_ptr);
free_mtdata(d);
return -1;
}
size_t origin[3] = {0,0,0};
size_t region[3] = {effective_img_width, effective_img_height, 1};
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteImage failed\n");
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
free(input_ptr);
free(output_ptr);
free_mtdata(d);
return -1;
}
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
free(input_ptr);
free(output_ptr);
free_mtdata(d);
return -1;
}
threads[0] = (size_t)effective_img_width;
threads[1] = (size_t)effective_img_height;
log_info("Testing image dimensions %d x %d with local threads %d x %d.\n",
effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, local_threads, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
log_error("Image Dimension test failed. image width = %d, image height = %d, local %d x %d\n",
effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
free(input_ptr);
free(output_ptr);
free_mtdata(d);
return -1;
}
err = clEnqueueReadImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadImage failed\n");
log_error("Image Dimension test failed. image width = %d, image height = %d, local %d x %d\n",
effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
free(input_ptr);
free(output_ptr);
free_mtdata(d);
return -1;
}
err = verify_8888_image(input_ptr, output_ptr, effective_img_width, effective_img_height);
if (err)
{
total_errors++;
log_error("Image Dimension test failed. image width = %d, image height = %d\n", effective_img_width, effective_img_height);
}
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
free(input_ptr);
free(output_ptr);
}
}
}
// cleanup
free_mtdata(d);
clReleaseSampler(sampler);
clReleaseKernel(kernel);
clReleaseProgram(program);
return total_errors;
}

View File

@@ -0,0 +1,220 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *rgba8888_kernel_code =
"\n"
"__kernel void test_rgba8888(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 color;\n"
"\n"
" if ( (tid_x >= get_image_width(dstimg)) || (tid_y >= get_image_height(dstimg)) )\n"
" return;\n"
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n";
static unsigned char *
generate_8888_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static int
verify_rgba8888_image(unsigned char *src, unsigned char *dst, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (dst[i] != src[i])
{
log_error("NPOT_IMAGE_RGBA_UNORM_INT8 test for width = %d, height = %d failed\n", w, h);
return -1;
}
}
log_info("NPOT_IMAGE_RGBA_UNORM_INT8 test for width = %d, height = %d passed\n", w, h);
return 0;
}
int img_width_selection[] = { 97, 111, 322, 479 };
int img_height_selection[] = { 149, 222, 754, 385 };
int
test_imagenpot(cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_image_format img_format;
unsigned char *input_ptr, *output_ptr;
cl_program program;
cl_kernel kernel;
size_t global_threads[3], local_threads[3];
size_t local_workgroup_size;
int img_width;
int img_height;
int err;
cl_uint m;
size_t max_local_workgroup_size[3];
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device_id )
cl_device_type device_type;
err = clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
if (err) {
log_error("Failed to get device type: %d\n",err);
return -1;
}
d = init_genrand( gRandomSeed );
for (m=0; m<sizeof(img_width_selection)/sizeof(int); m++)
{
img_width = img_width_selection[m];
img_height = img_height_selection[m];
input_ptr = generate_8888_image(img_width, img_height, d);
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format,
img_width, img_height, 0, NULL, NULL);
if (!streams[0])
{
log_error("create_image_2d failed\n");
free_mtdata(d);
return -1;
}
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format,
img_width, img_height, 0, NULL, NULL);
if (!streams[1])
{
log_error("create_image_2d failed\n");
free_mtdata(d);
return -1;
}
size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE,
origin, region, 0, 0,
input_ptr,
0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteImage failed\n");
free_mtdata(d);
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba8888_kernel_code, "test_rgba8888" );
if (err)
{
log_error("Failed to create kernel and program: %d\n", err);
free_mtdata(d);
return -1;
}
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
free_mtdata(d);
return -1;
}
err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local_workgroup_size), &local_workgroup_size, NULL);
test_error(err, "clGetKernelWorkGroupInfo for CL_KERNEL_WORK_GROUP_SIZE failed");
err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (local_workgroup_size > max_local_workgroup_size[0])
local_workgroup_size = max_local_workgroup_size[0];
global_threads[0] = ((img_width + local_workgroup_size - 1) / local_workgroup_size) * local_workgroup_size;
global_threads[1] = img_height;
local_threads[0] = local_workgroup_size;
local_threads[1] = 1;
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, global_threads, local_threads, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
free_mtdata(d);
return -1;
}
err = clEnqueueReadImage(queue, streams[1], CL_TRUE,
origin, region, 0, 0,
(void *)output_ptr,
0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_rgba8888_image(input_ptr, output_ptr, img_width, img_height);
// cleanup
clReleaseSampler(sampler);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
if (err)
break;
}
free_mtdata(d);
return err;
}

View File

@@ -0,0 +1,269 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static unsigned char *
generate_rgba8_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static int
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int x, int y, int w, int h, int img_width)
{
int i, j, indx;
for (j=y; j<(y+h); j++)
{
indx = j*img_width*4;
for (i=x*4; i<(x+w)*4; i++)
{
if (outptr[indx+i] != image[indx+i])
return -1;
}
}
return 0;
}
static unsigned short *
generate_rgba16_image(int w, int h, MTdata d)
{
unsigned short *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned short)genrand_int32(d);
return ptr;
}
static int
verify_rgba16_image(unsigned short *image, unsigned short *outptr, int x, int y, int w, int h, int img_width)
{
int i, j, indx;
for (j=y; j<(y+h); j++)
{
indx = j*img_width*4;
for (i=x*4; i<(x+w)*4; i++)
{
if (outptr[indx+i] != image[indx+i])
return -1;
}
}
return 0;
}
static float *
generate_rgbafp_image(int w, int h, MTdata d)
{
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
return ptr;
}
static int
verify_rgbafp_image(float *image, float *outptr, int x, int y, int w, int h, int img_width)
{
int i, j, indx;
for (j=y; j<(y+h); j++)
{
indx = j*img_width*4;
for (i=x*4; i<(x+w)*4; i++)
{
if (outptr[indx+i] != image[indx+i])
return -1;
}
}
return 0;
}
#define NUM_COPIES 10
static const char *test_str_names[] = { "CL_RGBA CL_UNORM_INT8", "CL_RGBA CL_UNORM_INT16", "CL_RGBA CL_FLOAT" };
int
test_imagerandomcopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_image_format img_format;
unsigned char *rgba8_inptr, *rgba8_outptr;
unsigned short *rgba16_inptr, *rgba16_outptr;
float *rgbafp_inptr, *rgbafp_outptr;
clMemWrapper streams[6];
int img_width = 512;
int img_height = 512;
int i, j;
cl_int err;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
log_info("Testing with image %d x %d.\n", img_width, img_height);
d = init_genrand( gRandomSeed );
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
for (i=0; i<3; i++)
{
void *p, *outp;
unsigned int x[2], y[2], delta_w, delta_h ;
switch (i)
{
case 0:
p = (void *)rgba8_inptr;
outp = (void *)rgba8_outptr;
break;
case 1:
p = (void *)rgba16_inptr;
outp = (void *)rgba16_outptr;
break;
case 2:
p = (void *)rgbafp_inptr;
outp = (void *)rgbafp_outptr;
break;
}
size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1};
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
// err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL);
test_error(err, "clEnqueueWriteImage failed");
for (j=0; j<NUM_COPIES; j++)
{
x[0] = (int)get_random_float(0, img_width, d);
do
{
x[1] = (int)get_random_float(0, img_width, d);
} while (x[1] <= x[0]);
y[0] = (int)get_random_float(0, img_height, d);
do
{
y[1] = (int)get_random_float(0, img_height, d);
} while (y[1] <= y[0]);
delta_w = x[1] - x[0];
delta_h = y[1] - y[0];
log_info("Testing clCopyImage for %s: x = %d, y = %d, w = %d, h = %d\n", test_str_names[i], x[0], y[0], delta_w, delta_h);
origin[0] = x[0];
origin[1] = y[0];
origin[2] = 0;
region[0] = delta_w;
region[1] = delta_h;
region[2] = 1;
err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
// err = clCopyImage(context, streams[i*2], streams[i*2+1],
// x[0], y[0], 0, x[0], y[0], 0, delta_w, delta_h, 0, NULL);
test_error(err, "clEnqueueCopyImage failed");
origin[0] = 0;
origin[1] = 0;
origin[2] = 0;
region[0] = img_width;
region[1] = img_height;
region[2] = 1;
err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
// err = clReadImage(context, streams[i*2+1], false, 0, 0, 0, img_width, img_height, 0, 0, 0, outp, NULL);
test_error(err, "clEnqueueReadImage failed");
switch (i)
{
case 0:
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, x[0], y[0], delta_w, delta_h, img_width);
break;
case 1:
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, x[0], y[0], delta_w, delta_h, img_width);
break;
case 2:
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, x[0], y[0], delta_w, delta_h, img_width);
break;
}
if (err)
break;
}
if (err)
break;
}
free_mtdata(d); d = NULL;
free(rgba8_inptr);
free(rgba16_inptr);
free(rgbafp_inptr);
free(rgba8_outptr);
free(rgba16_outptr);
free(rgbafp_outptr);
if (err)
log_error("IMAGE random copy test failed\n");
else
log_info("IMAGE random copy test passed\n");
return err;
}

View File

@@ -0,0 +1,417 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static unsigned char *
generate_rgba8_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static void
update_rgba8_image(unsigned char *p, int x, int y, int w, int h, int img_width, MTdata d)
{
int i, j, indx;
for (i=y; i<y+h; i++)
{
indx = (i * img_width + x) * 4;
for (j=x; j<x+w; j++,indx+=4)
{
p[indx+0] = (unsigned char)genrand_int32(d);
p[indx+1] = (unsigned char)genrand_int32(d);
p[indx+2] = (unsigned char)genrand_int32(d);
p[indx+3] = (unsigned char)genrand_int32(d);
}
}
}
static void
update_image_from_image(void *out, void *in, int x, int y, int w, int h, int img_width, int elem_size)
{
int i, j, k, out_indx, in_indx;
in_indx = 0;
for (i=y; i<y+h; i++)
{
out_indx = (i * img_width + x) * elem_size;
for (j=x; j<x+w; j++,out_indx+=elem_size)
{
for (k=0; k<elem_size; k++)
{
((char*)out)[out_indx + k] = ((char*)in)[in_indx];
in_indx++;
}
}
}
}
static int
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
{
log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
return -1;
}
}
return 0;
}
static unsigned short *
generate_rgba16_image(int w, int h, MTdata d)
{
unsigned short *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned short)genrand_int32(d);
return ptr;
}
static void
update_rgba16_image(unsigned short *p, int x, int y, int w, int h, int img_width, MTdata d)
{
int i, j, indx;
for (i=y; i<y+h; i++)
{
indx = (i * img_width + x) * 4;
for (j=x; j<x+w; j++,indx+=4)
{
p[indx+0] = (unsigned short)genrand_int32(d);
p[indx+1] = (unsigned short)genrand_int32(d);
p[indx+2] = (unsigned short)genrand_int32(d);
p[indx+3] = (unsigned short)genrand_int32(d);
}
}
}
static int
verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
{
log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
return -1;
}
}
return 0;
}
static float *
generate_rgbafp_image(int w, int h, MTdata d)
{
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
return ptr;
}
static void
update_rgbafp_image(float *p, int x, int y, int w, int h, int img_width, MTdata d)
{
int i, j, indx;
for (i=y; i<y+h; i++)
{
indx = (i * img_width + x) * 4;
for (j=x; j<x+w; j++,indx+=4)
{
p[indx+0] = get_random_float(-0x40000000, 0x40000000, d);
p[indx+1] = get_random_float(-0x40000000, 0x40000000, d);
p[indx+2] = get_random_float(-0x40000000, 0x40000000, d);
p[indx+3] = get_random_float(-0x40000000, 0x40000000, d);
}
}
}
static int
verify_rgbafp_image(float *image, float *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
{
log_error("i = %d. Expected (%f %f %f %f), got (%f %f %f %f)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
return -1;
}
}
return 0;
}
int
test_imagereadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_image_format img_format;
unsigned char *rgba8_inptr, *rgba8_outptr;
unsigned short *rgba16_inptr, *rgba16_outptr;
float *rgbafp_inptr, *rgbafp_outptr;
clMemWrapper streams[3];
int img_width = 512;
int img_height = 512;
int num_tries = 200;
int i, j, err;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
for (i=0; i<3; i++)
{
void *p;
if (i == 0)
p = (void *)rgba8_inptr;
else if (i == 1)
p = (void *)rgba16_inptr;
else
p = (void *)rgbafp_inptr;
size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, streams[i], CL_TRUE,
origin, region, 0, 0,
p, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteImage2D failed\n");
return -1;
}
}
for (i=0,j=0; i<num_tries*3; i++,j++)
{
int x = (int)get_random_float(0, img_width, d);
int y = (int)get_random_float(0, img_height, d);
int w = (int)get_random_float(1, (img_width - x), d);
int h = (int)get_random_float(1, (img_height - y), d);
size_t input_pitch;
int set_input_pitch = (int)(genrand_int32(d) & 0x01);
int packed_update = (int)(genrand_int32(d) & 0x01);
void *p, *outp;
int elem_size;
if (j == 3)
j = 0;
switch (j)
{
case 0:
//if ((w<=10) || (h<=10)) continue;
elem_size = 4;
if(packed_update)
{
p = generate_rgba8_image(w, h, d);
update_image_from_image(rgba8_inptr, p, x, y, w, h, img_width, elem_size);
}
else
{
update_rgba8_image(rgba8_inptr, x, y, w, h, img_width, d);
p = (void *)(rgba8_inptr + ((y * img_width + x) * 4));
}
outp = (void *)rgba8_outptr;
break;
case 1:
//if ((w<=8) || (h<=8)) continue;
elem_size = 2*4;
if(packed_update)
{
p = generate_rgba16_image(w, h, d);
update_image_from_image(rgba16_inptr, p, x, y, w, h, img_width, elem_size);
}
else
{
update_rgba16_image(rgba16_inptr, x, y, w, h, img_width, d);
p = (void *)(rgba16_inptr + ((y * img_width + x) * 4));
}
outp = (void *)rgba16_outptr;
break;
case 2:
//if ((w<=8) || (h<=8)) continue;
elem_size = 4*4;
if(packed_update)
{
p = generate_rgbafp_image(w, h, d);
update_image_from_image(rgbafp_inptr, p, x, y, w, h, img_width, elem_size);
}
else
{
update_rgbafp_image(rgbafp_inptr, x, y, w, h, img_width, d);
p = (void *)(rgbafp_inptr + ((y * img_width + x) * 4));
}
outp = (void *)rgbafp_outptr;
break;
}
const char* update_packed_pitch_name = "";
if(packed_update)
{
if(set_input_pitch)
{
// for packed updates the pitch does not need to be calculated here (but can be)
update_packed_pitch_name = "'packed with pitch'";
input_pitch = w*elem_size;
}
else
{
// for packed updates the pitch does not need to be calculated here
update_packed_pitch_name = "'packed without pitch'";
input_pitch = 0;
}
}
else
{
// for unpacked updates the pitch is required
update_packed_pitch_name = "'unpacked with pitch'";
input_pitch = img_width*elem_size;
}
size_t origin[3] = {x,y,0}, region[3] = {w, h, 1};
err = clEnqueueWriteImage(queue, streams[j], CL_TRUE,
origin, region, input_pitch, 0, p,
0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteImage update failed for %s %s: %d\n",
(packed_update) ? "packed" : "unpacked",
(set_input_pitch) ? "set pitch" : "unset pitch", err);
free_mtdata(d);
return -1;
}
if(packed_update)
{
free(p);
p = NULL;
}
memset(outp, 0x7, img_width*img_height*elem_size);
origin[0]=0; origin[1]=0; origin[2]=0;
region[0]=img_width; region[1]=img_height; region[2]=1;
err = clEnqueueReadImage(queue, streams[j], CL_TRUE,
origin, region, 0,0,
outp, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadImage failed\n");
free_mtdata(d);
return -1;
}
switch (j)
{
case 0:
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
if (err)
{
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
}
break;
case 1:
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
if (err)
{
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
}
break;
case 2:
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
if (err)
{
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
}
break;
}
if (err) break;
}
free_mtdata(d);
free(rgba8_inptr);
free(rgba16_inptr);
free(rgbafp_inptr);
free(rgba8_outptr);
free(rgba16_outptr);
free(rgbafp_outptr);
if (!err)
log_info("IMAGE read, write test passed\n");
return err;
}

View File

@@ -0,0 +1,417 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static unsigned char *
generate_rgba8_image(int w, int h, int d, MTdata mtData)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * d *4);
int i;
for (i=0; i<w*h*d*4; i++)
ptr[i] = (unsigned char)genrand_int32(mtData);
return ptr;
}
static void
update_rgba8_image(unsigned char *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
{
int i, j, k, indx;
int img_slice = img_width * img_height;
for (k=z; k<z+d; k++)
for (j=y; j<y+h; j++)
{
indx = (k * img_slice + j * img_width + x) * 4;
for (i=x; i<x+w; i++,indx+=4)
{
p[indx+0] = (unsigned char)genrand_int32(mtData);
p[indx+1] = (unsigned char)genrand_int32(mtData);
p[indx+2] = (unsigned char)genrand_int32(mtData);
p[indx+3] = (unsigned char)genrand_int32(mtData);
}
}
}
static void
update_image_from_image(void *out, void *in, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, int elem_size)
{
int i, j, k, elem, out_indx, in_indx;
int img_slice = img_width * img_height;
in_indx = 0;
for (k=z; k<z+d; k++)
for (j=y; j<y+h; j++)
{
out_indx = (k * img_slice + j * img_width + x) * elem_size;
for (i=x; i<x+w; i++,out_indx+=elem_size)
{
for (elem=0; elem<elem_size; elem++)
{
((char*)out)[out_indx + elem] = ((char*)in)[in_indx];
in_indx++;
}
}
}
}
static int
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h, int d)
{
int i;
for (i=0; i<w*h*d*4; i++)
{
if (outptr[i] != image[i])
{
log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
return -1;
}
}
return 0;
}
static unsigned short *
generate_rgba16_image(int w, int h, int d, MTdata mtData)
{
unsigned short *ptr = (unsigned short*)malloc(w * h * d * 4 * sizeof(unsigned short));
int i;
for (i=0; i<w*h*d*4; i++)
ptr[i] = (unsigned short)genrand_int32(mtData);
return ptr;
}
static void
update_rgba16_image(unsigned short *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
{
int i, j, k, indx;
int img_slice = img_width * img_height;
for (k=z; k<z+d; k++)
for (j=y; j<y+h; j++)
{
indx = (k * img_slice + j * img_width + x) * 4;
for (i=x; i<x+w; i++,indx+=4)
{
p[indx+0] = (unsigned short)genrand_int32(mtData);
p[indx+1] = (unsigned short)genrand_int32(mtData);
p[indx+2] = (unsigned short)genrand_int32(mtData);
p[indx+3] = (unsigned short)genrand_int32(mtData);
}
}
}
static int
verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h, int d)
{
int i;
for (i=0; i<w*h*d*4; i++)
{
if (outptr[i] != image[i])
{
log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
return -1;
}
}
return 0;
}
static float *
generate_rgbafp_image(int w, int h, int d, MTdata mtData)
{
float *ptr = (float*)malloc(w * h * d *4 * sizeof(float));
int i;
for (i=0; i<w*h*d*4; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, mtData);
return ptr;
}
static void
update_rgbafp_image(float *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
{
int i, j, k, indx;
int img_slice = img_width * img_height;
for (k=z; k<z+d; k++)
for (j=y; j<y+h; j++)
{
indx = (k * img_slice + j * img_width + x) * 4;
for (i=x; i<x+w; i++,indx+=4)
{
p[indx+0] = get_random_float(-0x40000000, 0x40000000, mtData);
p[indx+1] = get_random_float(-0x40000000, 0x40000000, mtData);
p[indx+2] = get_random_float(-0x40000000, 0x40000000, mtData);
p[indx+3] = get_random_float(-0x40000000, 0x40000000, mtData);
}
}
}
static int
verify_rgbafp_image(float *image, float *outptr, int w, int h, int d)
{
int i;
for (i=0; i<w*h*d*4; i++)
{
if (outptr[i] != image[i])
{
log_error("i = %d. Expected (%f %f %f %f), got (%f %f %f %f)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
return -1;
}
}
return 0;
}
int
test_imagereadwrite3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_image_format img_format;
unsigned char *rgba8_inptr, *rgba8_outptr;
unsigned short *rgba16_inptr, *rgba16_outptr;
float *rgbafp_inptr, *rgbafp_outptr;
clMemWrapper streams[3];
int img_width = 64;
int img_height = 64;
int img_depth = 32;
int img_slice = img_width * img_height;
int num_tries = 30;
int i, j, err;
MTdata mtData;
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
mtData = init_genrand( gRandomSeed );
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, img_depth, mtData);
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, img_depth, mtData);
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, img_depth, mtData);
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height * img_depth);
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height * img_depth);
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height * img_depth);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
for (i=0; i<3; i++)
{
void *p;
if (i == 0)
p = (void *)rgba8_inptr;
else if (i == 1)
p = (void *)rgba16_inptr;
else
p = (void *)rgbafp_inptr;
size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, img_depth};
err = clEnqueueWriteImage(queue, streams[i], CL_TRUE,
origin, region, 0, 0,
p,
0, NULL, NULL);
test_error(err, "clEnqueueWriteImage failed");
}
for (i=0,j=0; i<num_tries*3; i++,j++)
{
int x = (int)get_random_float(0, (float)img_width - 1, mtData);
int y = (int)get_random_float(0, (float)img_height - 1, mtData);
int z = (int)get_random_float(0, (float)img_depth - 1, mtData);
int w = (int)get_random_float(1, (float)(img_width - x), mtData);
int h = (int)get_random_float(1, (float)(img_height - y), mtData);
int d = (int)get_random_float(1, (float)(img_depth - z), mtData);
size_t input_pitch, input_slice_pitch;
int set_input_pitch = (int)(genrand_int32(mtData) & 0x01);
int packed_update = (int)(genrand_int32(mtData) & 0x01);
void *p, *outp;
int elem_size;
if (j == 3)
j = 0;
// packed: the source image for the write is a whole image .
// unpacked: the source image for the write is a subset within a larger image
switch (j)
{
case 0:
elem_size = 4;
if(packed_update)
{
p = generate_rgba8_image(w, h, d, mtData);
update_image_from_image(rgba8_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
}
else
{
update_rgba8_image(rgba8_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
p = (void *)(rgba8_inptr + ((z * img_slice + y * img_width + x) * 4));
}
outp = (void *)rgba8_outptr;
break;
case 1:
elem_size = 2*4;
if(packed_update)
{
p = generate_rgba16_image(w, h, d, mtData);
update_image_from_image(rgba16_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
}
else
{
update_rgba16_image(rgba16_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
p = (void *)(rgba16_inptr + ((z * img_slice + y * img_width + x) * 4));
}
outp = (void *)rgba16_outptr;
break;
case 2:
elem_size = 4*4;
if(packed_update)
{
p = generate_rgbafp_image(w, h, d, mtData);
update_image_from_image(rgbafp_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
}
else
{
update_rgbafp_image(rgbafp_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
p = (void *)(rgbafp_inptr + ((z * img_slice + y * img_width + x) * 4));
}
outp = (void *)rgbafp_outptr;
break;
}
const char* update_packed_pitch_name = "";
if(packed_update)
{
if(set_input_pitch)
{
// for packed updates the pitch does not need to be calculated here (but can be)
update_packed_pitch_name = "'packed with pitch'";
input_pitch = w*elem_size;
input_slice_pitch = w*h*elem_size;
}
else
{
// for packed updates the pitch does not need to be calculated here
update_packed_pitch_name = "'packed without pitch'";
input_pitch = 0;
input_slice_pitch = 0;
}
}
else
{
// for unpacked updates the pitch is required
update_packed_pitch_name = "'unpacked with pitch'";
input_pitch = img_width*elem_size;
input_slice_pitch = input_pitch*img_height;
}
size_t origin[3] = {x,y,z}, region[3] = {w, h, d};
err = clEnqueueWriteImage(queue, streams[j], CL_TRUE,
origin, region, input_pitch, input_slice_pitch,
p, 0, NULL, NULL);
test_error(err, "clEnqueueWriteImage failed");
if(packed_update)
{
free(p);
p = NULL;
}
memset(outp, 0x7, img_width*img_height*img_depth*elem_size);
origin[0]=0; origin[1]=0; origin[2]=0; region[0]=img_width; region[1]=img_height; region[2]=img_depth;
err = clEnqueueReadImage(queue, streams[j], CL_TRUE,
origin, region, 0, 0,
outp, 0, NULL, NULL);
test_error(err, "clEnqueueReadImage failed");
switch (j)
{
case 0:
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height, img_depth);
if (err)
{
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
}
break;
case 1:
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height, img_depth);
if (err)
{
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
}
break;
case 2:
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height, img_depth);
if (err)
{
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
}
break;
}
if (err)
break;
}
free_mtdata(mtData);
free(rgba8_inptr);
free(rgba16_inptr);
free(rgbafp_inptr);
free(rgba8_outptr);
free(rgba16_outptr);
free(rgbafp_outptr);
if (!err)
log_info("IMAGE read, write test passed\n");
return err;
}

View File

@@ -0,0 +1,159 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *int2float_kernel_code =
"__kernel void test_int2float(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n";
int
verify_int2float(cl_int *inptr, cl_float *outptr, int n)
{
int i;
for (i=0; i<n; i++)
{
if (outptr[i] != (float)inptr[i])
{
log_error("INT2FLOAT test failed\n");
return -1;
}
}
log_info("INT2FLOAT test passed\n");
return 0;
}
int
test_int2float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_int *input_ptr;
cl_float *output_ptr;
cl_program program;
cl_kernel kernel;
void *values[2];
size_t lengths[1];
size_t threads[1];
int err;
int i;
MTdata d;
input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_int)get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
lengths[0] = strlen(int2float_kernel_code);
program = clCreateProgramWithSource(context, 1, &int2float_kernel_code, lengths, NULL);
if (!program)
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgramExecutable failed\n");
return -1;
}
kernel = clCreateKernel(program, "test_int2float", NULL);
if (!kernel)
{
log_error("clCreateKernel failed\n");
return -1;
}
values[0] = streams[0];
values[1] = streams[1];
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (size_t)num_elements;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_int2float(input_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,389 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *int_add_kernel_code =
"__kernel void test_int_add(__global int *srcA, __global int *srcB, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *int_sub_kernel_code =
"__kernel void test_int_sub(__global int *srcA, __global int *srcB, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *int_mul_kernel_code =
"__kernel void test_int_mul(__global int *srcA, __global int *srcB, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
const char *int_mad_kernel_code =
"__kernel void test_int_mad(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
"}\n";
static const float MAX_ERR = 1e-5f;
int
verify_int_add(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("INT_ADD int test failed\n");
return -1;
}
}
log_info("INT_ADD int test passed\n");
return 0;
}
int
verify_int_sub(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("INT_SUB int test failed\n");
return -1;
}
}
log_info("INT_SUB int test passed\n");
return 0;
}
int
verify_int_mul(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("INT_MUL int test failed\n");
return -1;
}
}
log_info("INT_MUL int test passed\n");
return 0;
}
int
verify_int_mad(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i] + inptrC[i];
if (r != outptr[i])
{
log_error("INT_MAD int test failed\n");
return -1;
}
}
log_info("INT_MAD int test passed\n");
return 0;
}
int
test_intmath_int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[4];
cl_kernel kernel[4];
cl_int *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_int) * num_elements;
input_ptr[0] = (cl_int*)malloc(length);
input_ptr[1] = (cl_int*)malloc(length);
input_ptr[2] = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
p = input_ptr[0];
for (i=0; i<num_elements; i++)
p[i] = (int)genrand_int32(d);
p = input_ptr[1];
for (i=0; i<num_elements; i++)
p[i] = (int)genrand_int32(d);
p = input_ptr[2];
for (i=0; i<num_elements; i++)
p[i] = (int)genrand_int32(d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
program[0] = clCreateProgramWithSource(context, 1, &int_add_kernel_code, NULL, NULL);
if (!program[0])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[0] = clCreateKernel(program[0], "test_int_add", NULL);
if (!kernel[0])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[1] = clCreateProgramWithSource(context, 1, &int_sub_kernel_code, NULL, NULL);
if (!program[1])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[1] = clCreateKernel(program[1], "test_int_sub", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[2] = clCreateProgramWithSource(context, 1, &int_mul_kernel_code, NULL, NULL);
if (!program[2])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[2] = clCreateKernel(program[2], "test_int_mul", NULL);
if (!kernel[2])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[3] = clCreateProgramWithSource(context, 1, &int_mad_kernel_code, NULL, NULL);
if (!program[3])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[3] = clCreateKernel(program[3], "test_int_mad", NULL);
if (!kernel[3])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
for (i=0; i<4; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
switch (i)
{
case 0:
err = verify_int_add(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_int_sub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_int_mul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 3:
err = verify_int_mad(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
break;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<4; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,388 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *int_add2_kernel_code =
"__kernel void test_int_add2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *int_sub2_kernel_code =
"__kernel void test_int_sub2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *int_mul2_kernel_code =
"__kernel void test_int_mul2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
const char *int_mad2_kernel_code =
"__kernel void test_int_mad2(__global int2 *srcA, __global int2 *srcB, __global int2 *srcC, __global int2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
"}\n";
int
verify_int_add2(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("INT_ADD int2 test failed\n");
return -1;
}
}
log_info("INT_ADD int2 test passed\n");
return 0;
}
int
verify_int_sub2(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("INT_SUB int2 test failed\n");
return -1;
}
}
log_info("INT_SUB int2 test passed\n");
return 0;
}
int
verify_int_mul2(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("INT_MUL int2 test failed\n");
return -1;
}
}
log_info("INT_MUL int2 test passed\n");
return 0;
}
int
verify_int_mad2(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i] + inptrC[i];
if (r != outptr[i])
{
log_error("INT_MAD int2 test failed\n");
return -1;
}
}
log_info("INT_MAD int2 test passed\n");
return 0;
}
int
test_intmath_int2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[4];
cl_kernel kernel[4];
cl_int *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_int) * 2 * num_elements;
input_ptr[0] = (cl_int*)malloc(length);
input_ptr[1] = (cl_int*)malloc(length);
input_ptr[2] = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
p = input_ptr[0];
for (i=0; i<num_elements*2; i++)
p[i] = (int)genrand_int32(d);
p = input_ptr[1];
for (i=0; i<num_elements*2; i++)
p[i] = (int)genrand_int32(d);
p = input_ptr[2];
for (i=0; i<num_elements*2; i++)
p[i] = (int)genrand_int32(d);
free_mtdata( d );
d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
program[0] = clCreateProgramWithSource(context, 1, &int_add2_kernel_code, NULL, NULL);
if (!program[0])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[0] = clCreateKernel(program[0], "test_int_add2", NULL);
if (!kernel[0])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[1] = clCreateProgramWithSource(context, 1, &int_sub2_kernel_code, NULL, NULL);
if (!program[1])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[1] = clCreateKernel(program[1], "test_int_sub2", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[2] = clCreateProgramWithSource(context, 1, &int_mul2_kernel_code, NULL, NULL);
if (!program[2])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[2] = clCreateKernel(program[2], "test_int_mul2", NULL);
if (!kernel[2])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[3] = clCreateProgramWithSource(context, 1, &int_mad2_kernel_code, NULL, NULL);
if (!program[3])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[3] = clCreateKernel(program[3], "test_int_mad2", NULL);
if (!kernel[3])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
for (i=0; i<4; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
switch (i)
{
case 0:
err = verify_int_add2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_int_sub2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_int_mul2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 3:
err = verify_int_mad2(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
break;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<4; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,387 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *int_add4_kernel_code =
"__kernel void test_int_add4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *int_sub4_kernel_code =
"__kernel void test_int_sub4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *int_mul4_kernel_code =
"__kernel void test_int_mul4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
const char *int_mad4_kernel_code =
"__kernel void test_int_mad4(__global int4 *srcA, __global int4 *srcB, __global int4 *srcC, __global int4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
"}\n";
int
verify_int_add4(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("INT_ADD int4 test failed\n");
return -1;
}
}
log_info("INT_ADD int4 test passed\n");
return 0;
}
int
verify_int_sub4(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("INT_SUB int4 test failed\n");
return -1;
}
}
log_info("INT_SUB int4 test passed\n");
return 0;
}
int
verify_int_mul4(int *inptrA, int *inptrB, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("INT_MUL int4 test failed\n");
return -1;
}
}
log_info("INT_MUL int4 test passed\n");
return 0;
}
int
verify_int_mad4(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
{
int r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i] + inptrC[i];
if (r != outptr[i])
{
log_error("INT_MAD int4 test failed\n");
return -1;
}
}
log_info("INT_MAD int4 test passed\n");
return 0;
}
int
test_intmath_int4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[4];
cl_kernel kernel[4];
cl_int *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_int) * 4 * num_elements;
input_ptr[0] = (cl_int*)malloc(length);
input_ptr[1] = (cl_int*)malloc(length);
input_ptr[2] = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
p = input_ptr[0];
for (i=0; i<num_elements*4; i++)
p[i] = (int)genrand_int32(d);
p = input_ptr[1];
for (i=0; i<num_elements*4; i++)
p[i] = (int)genrand_int32(d);
p = input_ptr[2];
for (i=0; i<num_elements*4; i++)
p[i] = (int)genrand_int32(d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
program[0] = clCreateProgramWithSource(context, 1, &int_add4_kernel_code, NULL, NULL);
if (!program[0])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[0] = clCreateKernel(program[0], "test_int_add4", NULL);
if (!kernel[0])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[1] = clCreateProgramWithSource(context, 1, &int_sub4_kernel_code, NULL, NULL);
if (!program[1])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[1] = clCreateKernel(program[1], "test_int_sub4", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[2] = clCreateProgramWithSource(context, 1, &int_mul4_kernel_code, NULL, NULL);
if (!program[2])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[2] = clCreateKernel(program[2], "test_int_mul4", NULL);
if (!kernel[2])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[3] = clCreateProgramWithSource(context, 1, &int_mad4_kernel_code, NULL, NULL);
if (!program[3])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[3] = clCreateKernel(program[3], "test_int_mad4", NULL);
if (!kernel[3])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
for (i=0; i<4; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
switch (i)
{
case 0:
err = verify_int_add4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_int_sub4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_int_mul4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 3:
err = verify_int_mad4(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
break;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<4; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,397 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *long_add_kernel_code =
"__kernel void test_long_add(__global long *srcA, __global long *srcB, __global long *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *long_sub_kernel_code =
"__kernel void test_long_sub(__global long *srcA, __global long *srcB, __global long *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *long_mul_kernel_code =
"__kernel void test_long_mul(__global long *srcA, __global long *srcB, __global long *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
const char *long_mad_kernel_code =
"__kernel void test_long_mad(__global long *srcA, __global long *srcB, __global long *srcC, __global long *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
"}\n";
static const float MAX_ERR = 1e-5f;
int
verify_long_add(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("LONG_ADD int test failed\n");
return -1;
}
}
log_info("LONG_ADD int test passed\n");
return 0;
}
int
verify_long_sub(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("LONG_SUB int test failed\n");
return -1;
}
}
log_info("LONG_SUB int test passed\n");
return 0;
}
int
verify_long_mul(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("LONG_MUL int test failed\n");
return -1;
}
}
log_info("LONG_MUL int test passed\n");
return 0;
}
int
verify_long_mad(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i] + inptrC[i];
if (r != outptr[i])
{
log_error("LONG_MAD int test failed\n");
return -1;
}
}
log_info("LONG_MAD int test passed\n");
return 0;
}
int
test_intmath_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[4];
cl_kernel kernel[4];
cl_long *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
if(! gHasLong )
{
log_info("64-bit integers are not supported by this device. Skipping test.\n");
return CL_SUCCESS;
}
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_long) * num_elements;
input_ptr[0] = (cl_long*)malloc(length);
input_ptr[1] = (cl_long*)malloc(length);
input_ptr[2] = (cl_long*)malloc(length);
output_ptr = (cl_long*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
p = input_ptr[0];
for (i=0; i<num_elements; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
p = input_ptr[1];
for (i=0; i<num_elements; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
p = input_ptr[2];
for (i=0; i<num_elements; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
free_mtdata(d);
d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
program[0] = clCreateProgramWithSource(context, 1, &long_add_kernel_code, NULL, NULL);
if (!program[0])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[0] = clCreateKernel(program[0], "test_long_add", NULL);
if (!kernel[0])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[1] = clCreateProgramWithSource(context, 1, &long_sub_kernel_code, NULL, NULL);
if (!program[1])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[1] = clCreateKernel(program[1], "test_long_sub", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[2] = clCreateProgramWithSource(context, 1, &long_mul_kernel_code, NULL, NULL);
if (!program[2])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[2] = clCreateKernel(program[2], "test_long_mul", NULL);
if (!kernel[2])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[3] = clCreateProgramWithSource(context, 1, &long_mad_kernel_code, NULL, NULL);
if (!program[3])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[3] = clCreateKernel(program[3], "test_long_mad", NULL);
if (!kernel[3])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
for (i=0; i<4; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
switch (i)
{
case 0:
err = verify_long_add(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_long_sub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_long_mul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 3:
err = verify_long_mad(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
break;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<4; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,395 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *long_add2_kernel_code =
"__kernel void test_long_add2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *long_sub2_kernel_code =
"__kernel void test_long_sub2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *long_mul2_kernel_code =
"__kernel void test_long_mul2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
const char *long_mad2_kernel_code =
"__kernel void test_long_mad2(__global long2 *srcA, __global long2 *srcB, __global long2 *srcC, __global long2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
"}\n";
int
verify_long_add2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("LONG_ADD long2 test failed\n");
return -1;
}
}
log_info("LONG_ADD long2 test passed\n");
return 0;
}
int
verify_long_sub2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("LONG_SUB long2 test failed\n");
return -1;
}
}
log_info("LONG_SUB long2 test passed\n");
return 0;
}
int
verify_long_mul2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("LONG_MUL long2 test failed\n");
return -1;
}
}
log_info("LONG_MUL long2 test passed\n");
return 0;
}
int
verify_long_mad2(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i] + inptrC[i];
if (r != outptr[i])
{
log_error("LONG_MAD long2 test failed\n");
return -1;
}
}
log_info("LONG_MAD long2 test passed\n");
return 0;
}
int
test_intmath_long2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[4];
cl_kernel kernel[4];
cl_long *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
if(! gHasLong)
{
log_info("64-bit integers are not supported in this device. Skipping test.\n");
return 0;
}
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_long) * 2* num_elements;
input_ptr[0] = (cl_long*)malloc(length);
input_ptr[1] = (cl_long*)malloc(length);
input_ptr[2] = (cl_long*)malloc(length);
output_ptr = (cl_long*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
p = input_ptr[0];
for (i=0; i<num_elements * 2; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
p = input_ptr[1];
for (i=0; i<num_elements * 2; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
p = input_ptr[2];
for (i=0; i<num_elements * 2; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
free_mtdata(d);
d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
program[0] = clCreateProgramWithSource(context, 1, &long_add2_kernel_code, NULL, NULL);
if (!program[0])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[0] = clCreateKernel(program[0], "test_long_add2", NULL);
if (!kernel[0])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[1] = clCreateProgramWithSource(context, 1, &long_sub2_kernel_code, NULL, NULL);
if (!program[1])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[1] = clCreateKernel(program[1], "test_long_sub2", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[2] = clCreateProgramWithSource(context, 1, &long_mul2_kernel_code, NULL, NULL);
if (!program[2])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[2] = clCreateKernel(program[2], "test_long_mul2", NULL);
if (!kernel[2])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[3] = clCreateProgramWithSource(context, 1, &long_mad2_kernel_code, NULL, NULL);
if (!program[3])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[3] = clCreateKernel(program[3], "test_long_mad2", NULL);
if (!kernel[3])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
for (i=0; i<4; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
switch (i)
{
case 0:
err = verify_long_add2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_long_sub2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_long_mul2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 3:
err = verify_long_mad2(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
break;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<4; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,395 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *long_add4_kernel_code =
"__kernel void test_long_add4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *long_sub4_kernel_code =
"__kernel void test_long_sub4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *long_mul4_kernel_code =
"__kernel void test_long_mul4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
const char *long_mad4_kernel_code =
"__kernel void test_long_mad4(__global long4 *srcA, __global long4 *srcB, __global long4 *srcC, __global long4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
"}\n";
int
verify_long_add4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("LONG_ADD long4 test failed\n");
return -1;
}
}
log_info("LONG_ADD long4 test passed\n");
return 0;
}
int
verify_long_sub4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("LONG_SUB long4 test failed\n");
return -1;
}
}
log_info("LONG_SUB long4 test passed\n");
return 0;
}
int
verify_long_mul4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("LONG_MUL long4 test failed\n");
return -1;
}
}
log_info("LONG_MUL long4 test passed\n");
return 0;
}
int
verify_long_mad4(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
{
cl_long r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i] + inptrC[i];
if (r != outptr[i])
{
log_error("LONG_MAD long4 test failed\n");
return -1;
}
}
log_info("LONG_MAD long4 test passed\n");
return 0;
}
int
test_intmath_long4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[4];
cl_kernel kernel[4];
cl_long *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
if(! gHasLong )
{
log_info("64-bit integers are not supported by this device. Skipping test.\n");
return 0;
}
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_long) * 4 * num_elements;
input_ptr[0] = (cl_long*)malloc(length);
input_ptr[1] = (cl_long*)malloc(length);
input_ptr[2] = (cl_long*)malloc(length);
output_ptr = (cl_long*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
p = input_ptr[0];
for (i=0; i<num_elements * 4; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
p = input_ptr[1];
for (i=0; i<num_elements * 4; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
p = input_ptr[2];
for (i=0; i<num_elements * 4; i++)
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
free_mtdata(d);
d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
program[0] = clCreateProgramWithSource(context, 1, &long_add4_kernel_code, NULL, NULL);
if (!program[0])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[0] = clCreateKernel(program[0], "test_long_add4", NULL);
if (!kernel[0])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[1] = clCreateProgramWithSource(context, 1, &long_sub4_kernel_code, NULL, NULL);
if (!program[1])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[1] = clCreateKernel(program[1], "test_long_sub4", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[2] = clCreateProgramWithSource(context, 1, &long_mul4_kernel_code, NULL, NULL);
if (!program[2])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[2] = clCreateKernel(program[2], "test_long_mul4", NULL);
if (!kernel[2])
{
log_error("clCreateKernel failed\n");
return -1;
}
program[3] = clCreateProgramWithSource(context, 1, &long_mad4_kernel_code, NULL, NULL);
if (!program[3])
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgram failed\n");
return -1;
}
kernel[3] = clCreateKernel(program[3], "test_long_mad4", NULL);
if (!kernel[3])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
for (i=0; i<4; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
switch (i)
{
case 0:
err = verify_long_add4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_long_sub4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_long_mul4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 3:
err = verify_long_mad4(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
break;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<4; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,253 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
const char *kernel_call_kernel_code[] = {
"void test_function_to_call(__global int *output, __global int *input, int where);\n"
"\n"
"__kernel void test_kernel_to_call(__global int *output, __global int *input, int where) \n"
"{\n"
" int b;\n"
" if (where == 0) {\n"
" output[get_global_id(0)] = 0;\n"
" }\n"
" for (b=0; b<where; b++)\n"
" output[get_global_id(0)] += input[b]; \n"
"}\n"
"\n"
"__kernel void test_call_kernel(__global int *src, __global int *dst, int times) \n"
"{\n"
" int tid = get_global_id(0);\n"
" int a;\n"
" dst[tid] = 1;\n"
" for (a=0; a<times; a++)\n"
" test_kernel_to_call(dst, src, tid);\n"
"}\n"
"void test_function_to_call(__global int *output, __global int *input, int where) \n"
"{\n"
" int b;\n"
" if (where == 0) {\n"
" output[get_global_id(0)] = 0;\n"
" }\n"
" for (b=0; b<where; b++)\n"
" output[get_global_id(0)] += input[b]; \n"
"}\n"
"\n"
"__kernel void test_call_function(__global int *src, __global int *dst, int times) \n"
"{\n"
" int tid = get_global_id(0);\n"
" int a;\n"
" dst[tid] = 1;\n"
" for (a=0; a<times; a++)\n"
" test_function_to_call(dst, src, tid);\n"
"}\n"
};
int test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
num_elements = 256;
int error, errors = 0;
clProgramWrapper program;
clKernelWrapper kernel1, kernel2, kernel_to_call;
clMemWrapper streams[2];
size_t threads[] = {num_elements,1,1};
cl_int *input, *output, *expected;
cl_int times = 4;
int pass = 0;
input = (cl_int*)malloc(sizeof(cl_int)*num_elements);
output = (cl_int*)malloc(sizeof(cl_int)*num_elements);
expected = (cl_int*)malloc(sizeof(cl_int)*num_elements);
for (int i=0; i<num_elements; i++) {
input[i] = i;
output[i] = i;
expected[i] = output[i];
}
// Calculate the expected results
for (int tid=0; tid<num_elements; tid++) {
expected[tid] = 1;
for (int a=0; a<times; a++) {
int where = tid;
if (where == 0)
expected[tid] = 0;
for (int b=0; b<where; b++) {
expected[tid] += input[b];
}
}
}
// Test kernel calling a kernel
log_info("Testing kernel calling kernel...\n");
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel1, 1, kernel_call_kernel_code, "test_call_kernel" ) != 0 )
{
return -1;
}
kernel_to_call = clCreateKernel(program, "test_kernel_to_call", &error);
test_error(error, "clCreateKernel failed");
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*num_elements, input, &error);
test_error( error, "clCreateBuffer failed" );
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*num_elements, output, &error);
test_error( error, "clCreateBuffer failed" );
error = clSetKernelArg(kernel1, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "clSetKernelArg failed" );
error = clSetKernelArg(kernel1, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "clSetKernelArg failed" );
error = clSetKernelArg(kernel1, 2, sizeof( times ), &times);
test_error( error, "clSetKernelArg failed" );
error = clEnqueueNDRangeKernel( queue, kernel1, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "clEnqueueNDRangeKernel failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
test_error( error, "clEnqueueReadBuffer failed" );
// Compare the results
pass = 1;
for (int i=0; i<num_elements; i++) {
if (output[i] != expected[i]) {
if (errors > 10)
continue;
if (errors == 10) {
log_error("Suppressing further results...\n");
continue;
}
log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
errors++;
pass = 0;
}
}
if (pass) log_info("Passed kernel calling kernel...\n");
// Test kernel calling a function
log_info("Testing kernel calling function...\n");
// Reset the inputs
for (int i=0; i<num_elements; i++) {
input[i] = i;
output[i] = i;
}
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, input, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
kernel2 = clCreateKernel(program, "test_call_function", &error);
test_error(error, "clCreateKernel failed");
error = clSetKernelArg(kernel2, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "clSetKernelArg failed" );
error = clSetKernelArg(kernel2, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "clSetKernelArg failed" );
error = clSetKernelArg(kernel2, 2, sizeof( times ), &times);
test_error( error, "clSetKernelArg failed" );
error = clEnqueueNDRangeKernel( queue, kernel2, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "clEnqueueNDRangeKernel failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
test_error( error, "clEnqueueReadBuffer failed" );
// Compare the results
pass = 1;
for (int i=0; i<num_elements; i++) {
if (output[i] != expected[i]) {
if (errors > 10)
continue;
if (errors > 10) {
log_error("Suppressing further results...\n");
continue;
}
log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
errors++;
pass = 0;
}
}
if (pass) log_info("Passed kernel calling function...\n");
// Test calling the kernel we called from another kernel
log_info("Testing calling the kernel we called from another kernel before...\n");
// Reset the inputs
for (int i=0; i<num_elements; i++) {
input[i] = i;
output[i] = i;
expected[i] = output[i];
}
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, input, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
// Calculate the expected results
int where = times;
for (int tid=0; tid<num_elements; tid++) {
if (where == 0)
expected[tid] = 0;
for (int b=0; b<where; b++) {
expected[tid] += input[b];
}
}
error = clSetKernelArg(kernel_to_call, 0, sizeof( streams[1] ), &streams[1]);
test_error( error, "clSetKernelArg failed" );
error = clSetKernelArg(kernel_to_call, 1, sizeof( streams[0] ), &streams[0]);
test_error( error, "clSetKernelArg failed" );
error = clSetKernelArg(kernel_to_call, 2, sizeof( times ), &times);
test_error( error, "clSetKernelArg failed" );
error = clEnqueueNDRangeKernel( queue, kernel_to_call, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "clEnqueueNDRangeKernel failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
test_error( error, "clEnqueueReadBuffer failed" );
// Compare the results
pass = 1;
for (int i=0; i<num_elements; i++) {
if (output[i] != expected[i]) {
if (errors > 10)
continue;
if (errors > 10) {
log_error("Suppressing further results...\n");
continue;
}
log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
errors++;
pass = 0;
}
}
if (pass) log_info("Passed calling the kernel we called from another kernel before...\n");
free( input );
free( output );
free( expected );
return errors;
}

View File

@@ -0,0 +1,572 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _WIN32
#include <unistd.h>
#endif
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/errorHelpers.h"
// For global, local, and constant
const char *parameter_kernel_long =
"%s\n" // optional pragma
"kernel void test(global ulong *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
"{\n"
" results[0] = (ulong)&mem0[0];\n"
" results[1] = (ulong)&mem2[0];\n"
" results[2] = (ulong)&mem3[0];\n"
" results[3] = (ulong)&mem4[0];\n"
" results[4] = (ulong)&mem8[0];\n"
" results[5] = (ulong)&mem16[0];\n"
"}\n";
// For private and local
const char *local_kernel_long =
"%s\n" // optional pragma
"kernel void test(global ulong *results)\n"
"{\n"
" %s %s mem0[3];\n"
" %s %s2 mem2[3];\n"
" %s %s3 mem3[3];\n"
" %s %s4 mem4[3];\n"
" %s %s8 mem8[3];\n"
" %s %s16 mem16[3];\n"
" results[0] = (ulong)&mem0[0];\n"
" results[1] = (ulong)&mem2[0];\n"
" results[2] = (ulong)&mem3[0];\n"
" results[3] = (ulong)&mem4[0];\n"
" results[4] = (ulong)&mem8[0];\n"
" results[5] = (ulong)&mem16[0];\n"
"}\n";
// For constant
const char *constant_kernel_long =
"%s\n" // optional pragma
" constant %s mem0[3] = {0};\n"
" constant %s2 mem2[3] = {(%s2)(0)};\n"
" constant %s3 mem3[3] = {(%s3)(0)};\n"
" constant %s4 mem4[3] = {(%s4)(0)};\n"
" constant %s8 mem8[3] = {(%s8)(0)};\n"
" constant %s16 mem16[3] = {(%s16)(0)};\n"
"\n"
"kernel void test(global ulong *results)\n"
"{\n"
" results[0] = (ulong)&mem0;\n"
" results[1] = (ulong)&mem2;\n"
" results[2] = (ulong)&mem3;\n"
" results[3] = (ulong)&mem4;\n"
" results[4] = (ulong)&mem8;\n"
" results[5] = (ulong)&mem16;\n"
"}\n";
// For global, local, and constant
const char *parameter_kernel_no_long =
"%s\n" // optional pragma
"kernel void test(global uint *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
"{\n"
" results[0] = (uint)&mem0[0];\n"
" results[1] = (uint)&mem2[0];\n"
" results[2] = (uint)&mem3[0];\n"
" results[3] = (uint)&mem4[0];\n"
" results[4] = (uint)&mem8[0];\n"
" results[5] = (uint)&mem16[0];\n"
"}\n";
// For private and local
const char *local_kernel_no_long =
"%s\n" // optional pragma
"kernel void test(global uint *results)\n"
"{\n"
" %s %s mem0[3];\n"
" %s %s2 mem2[3];\n"
" %s %s3 mem3[3];\n"
" %s %s4 mem4[3];\n"
" %s %s8 mem8[3];\n"
" %s %s16 mem16[3];\n"
" results[0] = (uint)&mem0[0];\n"
" results[1] = (uint)&mem2[0];\n"
" results[2] = (uint)&mem3[0];\n"
" results[3] = (uint)&mem4[0];\n"
" results[4] = (uint)&mem8[0];\n"
" results[5] = (uint)&mem16[0];\n"
"}\n";
// For constant
const char *constant_kernel_no_long =
"%s\n" // optional pragma
" constant %s mem0[3] = {0};\n"
" constant %s2 mem2[3] = {(%s2)(0)};\n"
" constant %s3 mem3[3] = {(%s3)(0)};\n"
" constant %s4 mem4[3] = {(%s4)(0)};\n"
" constant %s8 mem8[3] = {(%s8)(0)};\n"
" constant %s16 mem16[3] = {(%s16)(0)};\n"
"\n"
"kernel void test(global uint *results)\n"
"{\n"
" results[0] = (uint)&mem0;\n"
" results[1] = (uint)&mem2;\n"
" results[2] = (uint)&mem3;\n"
" results[3] = (uint)&mem4;\n"
" results[4] = (uint)&mem8;\n"
" results[5] = (uint)&mem16;\n"
"}\n";
enum AddressSpaces
{
kGlobal = 0,
kLocal,
kConstant,
kPrivate
};
typedef enum AddressSpaces AddressSpaces;
#define DEBUG 0
const char * get_explicit_address_name( AddressSpaces address )
{
/* Quick method to avoid branching: make sure the following array matches the Enum order */
static const char *sExplicitAddressNames[] = { "global", "local", "constant", "private"};
return sExplicitAddressNames[ address ];
}
int test_kernel_memory_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, AddressSpaces address )
{
const char *constant_kernel;
const char *parameter_kernel;
const char *local_kernel;
if ( gHasLong )
{
constant_kernel = constant_kernel_long;
parameter_kernel = parameter_kernel_long;
local_kernel = local_kernel_long;
}
else
{
constant_kernel = constant_kernel_no_long;
parameter_kernel = parameter_kernel_no_long;
local_kernel = local_kernel_no_long;
}
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
char *kernel_code = (char*)malloc(4096);
cl_kernel kernel;
cl_program program;
int error;
int total_errors = 0;
cl_mem results;
cl_ulong *results_data;
cl_mem mem0, mem2, mem3, mem4, mem8, mem16;
results_data = (cl_ulong*)malloc(sizeof(cl_ulong)*6);
results = clCreateBuffer(context, 0, sizeof(cl_ulong)*6, NULL, &error);
test_error(error, "clCreateBuffer failed");
mem0 = clCreateBuffer(context, 0, sizeof(cl_long), NULL, &error);
test_error(error, "clCreateBuffer failed");
mem2 = clCreateBuffer(context, 0, sizeof(cl_long)*2, NULL, &error);
test_error(error, "clCreateBuffer failed");
mem3 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
test_error(error, "clCreateBuffer failed");
mem4 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
test_error(error, "clCreateBuffer failed");
mem8 = clCreateBuffer(context, 0, sizeof(cl_long)*8, NULL, &error);
test_error(error, "clCreateBuffer failed");
mem16 = clCreateBuffer(context, 0, sizeof(cl_long)*16, NULL, &error);
test_error(error, "clCreateBuffer failed");
// For each type
// Calculate alignment mask for each size
// For global, local, constant, private
// If global, local or constant -- do parameter_kernel
// If private or local -- do local_kernel
// If constant -- do constant kernel
int numConstantArgs;
clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof(numConstantArgs), &numConstantArgs, NULL);
int typeIndex;
for (typeIndex = 0; typeIndex < 10; typeIndex++) {
// Skip double tests if we don't support doubles
if (vecType[typeIndex] == kDouble && !is_extension_available(device, "cl_khr_fp64")) {
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
continue;
}
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
continue;
log_info("Testing %s...\n", get_explicit_type_name(vecType[typeIndex]));
// Determine the expected alignment masks.
// E.g., if it is supposed to be 4 byte aligned, we should get 4-1=3 = ... 000011
// We can then and the returned address with that and we should have 0.
cl_ulong alignments[6];
alignments[0] = get_explicit_type_size(vecType[typeIndex])-1;
alignments[1] = (get_explicit_type_size(vecType[typeIndex])<<1)-1;
alignments[2] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
alignments[3] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
alignments[4] = (get_explicit_type_size(vecType[typeIndex])<<3)-1;
alignments[5] = (get_explicit_type_size(vecType[typeIndex])<<4)-1;
// Parameter kernel
if (address == kGlobal || address == kLocal || address == kConstant) {
log_info("\tTesting parameter kernel...\n");
if ( (gIsEmbedded) && (address == kConstant) && (numConstantArgs < 6)) {
sprintf(kernel_code, parameter_kernel,
vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
);
}
else {
sprintf(kernel_code, parameter_kernel,
vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
);
}
//printf("Kernel is: \n%s\n", kernel_code);
// Create the kernel
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
test_error(error, "create_single_kernel_helper failed");
// Initialize the results
memset(results_data, 0, sizeof(cl_long)*5);
error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*6, results_data, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
// Set the arguments
error = clSetKernelArg(kernel, 0, sizeof(results), &results);
test_error(error, "clSetKernelArg failed");
if (address != kLocal) {
error = clSetKernelArg(kernel, 1, sizeof(mem0), &mem0);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 2, sizeof(mem2), &mem2);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 3, sizeof(mem3), &mem3);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 4, sizeof(mem4), &mem4);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 5, sizeof(mem8), &mem8);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 6, sizeof(mem16), &mem16);
test_error(error, "clSetKernelArg failed");
} else {
error = clSetKernelArg(kernel, 1, get_explicit_type_size(vecType[typeIndex]), NULL);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 2, get_explicit_type_size(vecType[typeIndex])*2, NULL);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 3, get_explicit_type_size(vecType[typeIndex])*4, NULL);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 4, get_explicit_type_size(vecType[typeIndex])*4, NULL);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 5, get_explicit_type_size(vecType[typeIndex])*8, NULL);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 6, get_explicit_type_size(vecType[typeIndex])*16, NULL);
test_error(error, "clSetKernelArg failed");
}
// Enqueue the kernel
size_t global_size = 1;
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*6, results_data, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
if (gHasLong) {
for (int i = 0; i < 6; i++) {
if ((results_data[i] & alignments[i]) != 0) {
total_errors++;
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
} else {
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
}
}
}
// Verify the results on devices that do not support longs
else {
cl_uint *results_data_no_long = (cl_uint *)results_data;
for (int i = 0; i < 6; i++) {
if ((results_data_no_long[i] & alignments[i]) != 0) {
total_errors++;
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
} else {
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
}
}
}
clReleaseKernel(kernel);
clReleaseProgram(program);
}
// Local kernel
if (address == kLocal || address == kPrivate) {
log_info("\tTesting local kernel...\n");
sprintf(kernel_code, local_kernel,
vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
);
//printf("Kernel is: \n%s\n", kernel_code);
// Create the kernel
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
test_error(error, "create_single_kernel_helper failed");
// Initialize the results
memset(results_data, 0, sizeof(cl_long)*5);
error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
// Set the arguments
error = clSetKernelArg(kernel, 0, sizeof(results), &results);
test_error(error, "clSetKernelArg failed");
// Enqueue the kernel
size_t global_size = 1;
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
if (gHasLong) {
for (int i = 0; i < 5; i++) {
if ((results_data[i] & alignments[i]) != 0) {
total_errors++;
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
} else {
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
}
}
}
// Verify the results on devices that do not support longs
else {
cl_uint *results_data_no_long = (cl_uint *)results_data;
for (int i = 0; i < 5; i++) {
if ((results_data_no_long[i] & alignments[i]) != 0) {
total_errors++;
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
} else {
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
}
}
}
clReleaseKernel(kernel);
clReleaseProgram(program);
}
// Constant kernel
if (address == kConstant) {
log_info("\tTesting constant kernel...\n");
sprintf(kernel_code, constant_kernel,
vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex]),
get_explicit_type_name(vecType[typeIndex])
);
//printf("Kernel is: \n%s\n", kernel_code);
// Create the kernel
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
test_error(error, "create_single_kernel_helper failed");
// Initialize the results
memset(results_data, 0, sizeof(cl_long)*5);
error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed");
// Set the arguments
error = clSetKernelArg(kernel, 0, sizeof(results), &results);
test_error(error, "clSetKernelArg failed");
// Enqueue the kernel
size_t global_size = 1;
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
if (gHasLong) {
for (int i = 0; i < 5; i++) {
if ((results_data[i] & alignments[i]) != 0) {
total_errors++;
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
} else {
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
}
}
}
// Verify the results on devices that do not support longs
else {
cl_uint *results_data_no_long = (cl_uint *)results_data;
for (int i = 0; i < 5; i++) {
if ((results_data_no_long[i] & alignments[i]) != 0) {
total_errors++;
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
} else {
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
}
}
}
clReleaseKernel(kernel);
clReleaseProgram(program);
}
}
clReleaseMemObject(results);
clReleaseMemObject(mem0);
clReleaseMemObject(mem2);
clReleaseMemObject(mem3);
clReleaseMemObject(mem4);
clReleaseMemObject(mem8);
clReleaseMemObject(mem16);
free( kernel_code );
free( results_data );
if (total_errors != 0)
return -1;
return 0;
}
int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
return test_kernel_memory_alignment( device, context, queue, n_elems, kLocal );
}
int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
return test_kernel_memory_alignment( device, context, queue, n_elems, kGlobal );
}
int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// There is a class of approved OpenCL 1.0 conformant devices out there that in some circumstances
// are unable to meaningfully take (or more precisely use) the address of constant data by virtue
// of limitations in their ISA design. This feature was not tested in 1.0, so they were declared
// conformant by Khronos. The failure is however caught here.
//
// Unfortunately, determining whether or not these devices are 1.0 conformant is not the jurisdiction
// of the 1.1 tests -- We can't fail them from 1.1 conformance here because they are not 1.1
// devices. They are merely 1.0 conformant devices that interop with 1.1 devices in a 1.1 platform.
// To add new binding tests now to conformant 1.0 devices would violate the workingroup requirement
// of no new tests for 1.0 devices. So certain allowances have to be made in intractable cases
// such as this one.
//
// There is some precedent. Similar allowances are made for other 1.0 hardware features such as
// local memory size. The minimum required local memory size grew from 16 kB to 32 kB in OpenCL 1.1.
// Detect 1.0 devices
// Get CL_DEVICE_VERSION size
size_t string_size = 0;
int err;
if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, 0, NULL, &string_size ) ) )
{
log_error( "FAILURE: Unable to get size of CL_DEVICE_VERSION string!" );
return -1;
}
//Allocate storage to hold the version string
char *version_string = (char*) malloc(string_size);
if( NULL == version_string )
{
log_error( "FAILURE: Unable to allocate memory to hold CL_DEVICE_VERSION string!" );
return -1;
}
// Get CL_DEVICE_VERSION string
if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, string_size, version_string, NULL ) ) )
{
log_error( "FAILURE: Unable to read CL_DEVICE_VERSION string!" );
return -1;
}
// easy out for 1.0 devices
const char *string_1_0 = "OpenCL 1.0 ";
if( 0 == strncmp( version_string, string_1_0, strlen(string_1_0)) )
{
log_info( "WARNING: Allowing device to escape testing of difficult constant memory alignment case.\n\tDevice is not a OpenCL 1.1 device. CL_DEVICE_VERSION: \"%s\"\n", version_string );
free(version_string);
return 0;
}
log_info( "Device version string: \"%s\"\n", version_string );
free(version_string);
// Everyone else is to be ground mercilessly under the wheels of progress
return test_kernel_memory_alignment( device, context, queue, n_elems, kConstant );
}
int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
return test_kernel_memory_alignment( device, context, queue, n_elems, kPrivate );
}

View File

@@ -0,0 +1,372 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *barrier_with_localmem_kernel_code[] = {
"__kernel void compute_sum_with_localmem(__global int *a, int n, __local int *tmp_sum, __global int *sum)\n"
"{\n"
" int tid = get_local_id(0);\n"
" int lsize = get_local_size(0);\n"
" int i;\n"
"\n"
" tmp_sum[tid] = 0;\n"
" for (i=tid; i<n; i+=lsize)\n"
" tmp_sum[tid] += a[i];\n"
"\n"
" if( lsize == 1 )\n"
" {\n"
" if( tid == 0 )\n"
" *sum = tmp_sum[0];\n"
" return;\n"
" }\n"
"\n"
" do\n"
" {\n"
" barrier(CLK_LOCAL_MEM_FENCE);\n"
" if (tid < lsize/2)\n"
" {\n"
" int sum = tmp_sum[tid];\n"
" if( (lsize & 1) && tid == 0 )\n"
" sum += tmp_sum[tid + lsize - 1];\n"
" tmp_sum[tid] = sum + tmp_sum[tid + lsize/2];\n"
" }\n"
" lsize = lsize/2; \n"
" }while( lsize );\n"
"\n"
" if( tid == 0 )\n"
" *sum = tmp_sum[0];\n"
"}\n",
"__kernel void compute_sum_with_localmem(__global int *a, int n, __global int *sum)\n"
"{\n"
" __local int tmp_sum[%d];\n"
" int tid = get_local_id(0);\n"
" int lsize = get_local_size(0);\n"
" int i;\n"
"\n"
" tmp_sum[tid] = 0;\n"
" for (i=tid; i<n; i+=lsize)\n"
" tmp_sum[tid] += a[i];\n"
"\n"
" if( lsize == 1 )\n"
" {\n"
" if( tid == 0 )\n"
" *sum = tmp_sum[0];\n"
" return;\n"
" }\n"
"\n"
" do\n"
" {\n"
" barrier(CLK_LOCAL_MEM_FENCE);\n"
" if (tid < lsize/2)\n"
" {\n"
" int sum = tmp_sum[tid];\n"
" if( (lsize & 1) && tid == 0 )\n"
" sum += tmp_sum[tid + lsize - 1];\n"
" tmp_sum[tid] = sum + tmp_sum[tid + lsize/2];\n"
" }\n"
" lsize = lsize/2; \n"
" }while( lsize );\n"
"\n"
" if( tid == 0 )\n"
" *sum = tmp_sum[0];\n"
"}\n"
};
static int
verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
{
int r = 0;
int i;
for (i=0; i<n; i++)
{
r += inptr[i];
}
if (r != outptr[0])
{
log_error("LOCAL test failed: *%d vs %d\n", r, outptr[0] );
return -1;
}
log_info("LOCAL test passed\n");
return 0;
}
int test_local_arg_def(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_program program;
cl_kernel kernel;
cl_int *input_ptr, *output_ptr, *tmp_ptr;
size_t global_threads[1], local_threads[1];
size_t wgsize, kwgsize;
size_t max_local_workgroup_size[3];
int err, i;
MTdata d = init_genrand( gRandomSeed );
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof wgsize, &wgsize, NULL);
if (err) {
log_error("clGetDeviceInfo failed, %d\n\n", err);
return -1;
}
wgsize/=2;
if (wgsize < 1)
wgsize = 1;
size_t in_length = sizeof(cl_int) * num_elements;
size_t out_length = sizeof(cl_int) * wgsize;
input_ptr = (cl_int *)malloc(in_length);
output_ptr = (cl_int *)malloc(out_length);
tmp_ptr = (cl_int *)malloc(out_length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, in_length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, out_length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
for (i=0; i<num_elements; i++)
input_ptr[i] = (int)genrand_int32(d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, in_length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_with_localmem_kernel_code[0], "compute_sum_with_localmem" );
if (err)
return -1;
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof kwgsize, &kwgsize, NULL);
test_error(err, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (kwgsize > max_local_workgroup_size[0])
kwgsize = max_local_workgroup_size[0];
// err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes);
err = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
err |= clSetKernelArg(kernel, 2, wgsize * sizeof(cl_int), NULL);
err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
global_threads[0] = wgsize;
local_threads[0] = wgsize;
// Adjust the local thread size to fit and be a nice multiple.
if (kwgsize < wgsize) {
log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize);
local_threads[0] = kwgsize;
}
while (global_threads[0] % local_threads[0] != 0)
local_threads[0]--;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(tmp_ptr);
free(output_ptr);
return err;
}
int test_local_kernel_def(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_program program;
cl_kernel kernel;
cl_int *input_ptr, *output_ptr, *tmp_ptr;
size_t global_threads[1], local_threads[1];
size_t wgsize, kwgsize;
int err, i;
char *program_source = (char*)malloc(sizeof(char)*2048);
MTdata d = init_genrand( gRandomSeed );
size_t max_local_workgroup_size[3];
memset(program_source, 0, 2048);
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof wgsize, &wgsize, NULL);
if (err) {
log_error("clGetDeviceInfo failed, %d\n\n", err);
return -1;
}
wgsize/=2;
if (wgsize < 1)
wgsize = 1;
size_t in_length = sizeof(cl_int) * num_elements;
size_t out_length = sizeof(cl_int) * wgsize;
input_ptr = (cl_int *)malloc(in_length);
output_ptr = (cl_int *)malloc(out_length);
tmp_ptr = (cl_int *)malloc(out_length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, in_length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, out_length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_int) genrand_int32(d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, in_length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
// Validate that created kernel doesn't violate local memory size allowed by the device
cl_ulong localMemSize = 0;
err = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(localMemSize), &localMemSize, NULL);
if (err != CL_SUCCESS)
{
log_error("clGetDeviceInfo failed\n");
return -1;
}
if ( wgsize > (localMemSize / (sizeof(cl_int)*sizeof(cl_int))) )
{
wgsize = localMemSize / (sizeof(cl_int)*sizeof(cl_int));
}
sprintf(program_source, barrier_with_localmem_kernel_code[1], (int)(wgsize * sizeof(cl_int)));
err = create_single_kernel_helper(context, &program, &kernel, 1, (const char**)&program_source, "compute_sum_with_localmem" );
free(program_source);
if (err)
return -1;
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof kwgsize, &kwgsize, NULL);
test_error(err, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (kwgsize > max_local_workgroup_size[0])
kwgsize = max_local_workgroup_size[0];
// err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes);
err = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
err |= clSetKernelArg(kernel, 2, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
global_threads[0] = wgsize;
local_threads[0] = wgsize;
// Adjust the local thread size to fit and be a nice multiple.
if (kwgsize < wgsize) {
log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize);
local_threads[0] = kwgsize;
}
while (global_threads[0] % local_threads[0] != 0)
local_threads[0]--;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(tmp_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,138 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#define MAX_LOCAL_STORAGE_SIZE 256
#define MAX_LOCAL_STORAGE_SIZE_STRING "256"
const char *kernelSource[] = {
"__kernel void test( __global unsigned int * input, __global unsigned int *outMaxes )\n"
"{\n"
" __local unsigned int localStorage[ " MAX_LOCAL_STORAGE_SIZE_STRING " ];\n"
" unsigned int theValue = input[ get_global_id( 0 ) ];\n"
"\n"
" // If we just write linearly, there's no verification that the items in a group share local data\n"
" // So we write reverse-linearly, which requires items to read the local data written by at least one\n"
" // different item\n"
" localStorage[ get_local_size( 0 ) - get_local_id( 0 ) - 1 ] = theValue;\n"
"\n"
" // The barrier ensures that all local items have written to the local storage\n"
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
" // Now we loop back through the local storage and look for the max value. We only do this if\n"
" // we're the first item in a group\n"
" unsigned int max = 0;\n"
" if( get_local_id( 0 ) == 0 )\n"
" {\n"
" for( size_t i = 0; i < get_local_size( 0 ); i++ )\n"
" {\n"
" if( localStorage[ i ] > max )\n"
" max = localStorage[ i ];\n"
" }\n"
" outMaxes[ get_group_id( 0 ) ] = max;\n"
" }\n"
"}\n"
};
int test_local_kernel_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
MTdata randSeed = init_genrand( gRandomSeed );
// Create a test kernel
error = create_single_kernel_helper( context, &program, &kernel, 1, kernelSource, "test" );
test_error( error, "Unable to create test kernel" );
// Determine an appropriate test size
size_t workGroupSize;
error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workGroupSize ), &workGroupSize, NULL );
test_error( error, "Unable to obtain kernel work group size" );
// Make sure the work group size doesn't overrun our local storage size in the kernel
while( workGroupSize > MAX_LOCAL_STORAGE_SIZE )
workGroupSize >>= 1;
size_t testSize = workGroupSize;
while( testSize < 1024 )
testSize += workGroupSize;
size_t numGroups = testSize / workGroupSize;
log_info( "\tTesting with %ld groups, %ld elements per group...\n", numGroups, workGroupSize );
// Create two buffers for operation
cl_uint *inputData = (cl_uint*)malloc( testSize * sizeof(cl_uint) );
generate_random_data( kUInt, testSize, randSeed, inputData );
free_mtdata( randSeed );
streams[ 0 ] = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, testSize * sizeof(cl_uint), inputData, &error );
test_error( error, "Unable to create input buffer" );
cl_uint *outputData = (cl_uint*)malloc( numGroups *sizeof(cl_uint) );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_WRITE_ONLY, numGroups * sizeof(cl_uint), NULL, &error );
test_error( error, "Unable to create output buffer" );
// Set up the kernel args and run
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
test_error( error, "Unable to set kernel arg" );
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
test_error( error, "Unable to set kernel arg" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &testSize, &workGroupSize, 0, NULL, NULL );
test_error( error, "Unable to enqueue kernel" );
// Read results and verify
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, numGroups * sizeof(cl_uint), outputData, 0, NULL, NULL );
test_error( error, "Unable to read output data" );
// MingW compiler seems to have a bug that otimizes the code below incorrectly.
// adding the volatile keyword to size_t decleration to avoid aggressive optimization by the compiler.
for( volatile size_t i = 0; i < numGroups; i++ )
{
// Determine the max in our case
cl_uint localMax = 0;
for( volatile size_t j = 0; j < workGroupSize; j++ )
{
if( inputData[ i * workGroupSize + j ] > localMax )
localMax = inputData[ i * workGroupSize + j ];
}
if( outputData[ i ] != localMax )
{
log_error( "ERROR: Local max validation failed! (expected %u, got %u for i=%lu)\n", localMax, outputData[ i ] , i );
free(inputData);
free(outputData);
return -1;
}
}
free(inputData);
free(outputData);
return 0;
}

View File

@@ -0,0 +1,184 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *loop_kernel_code =
"__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" int n = get_global_size(0);\n"
" int i, j;\n"
"\n"
" dst[tid] = 0;\n"
" for (i=0,j=loopindx[tid]; i<loopcnt[tid]; i++,j++)\n"
" {\n"
" if (j >= n)\n"
" j = 0;\n"
" dst[tid] += src[j];\n"
" }\n"
"\n"
"}\n";
int
verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n)
{
int r, i, j, k;
for (i=0; i<n; i++)
{
r = 0;
for (j=0,k=loopindx[i]; j<loopcnt[i]; j++,k++)
{
if (k >= n)
k = 0;
r += inptr[k];
}
if (r != outptr[i])
{
log_error("LOOP test failed: %d found, expected %d\n", outptr[i], r);
return -1;
}
}
log_info("LOOP test passed\n");
return 0;
}
int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_int *input_ptr, *loop_indx, *loop_cnt, *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[1];
int err, i;
size_t length = sizeof(cl_int) * num_elements;
input_ptr = (cl_int*)malloc(length);
loop_indx = (cl_int*)malloc(length);
loop_cnt = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
MTdata d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
{
input_ptr[i] = (int)genrand_int32(d);
loop_indx[i] = (int)get_random_float(0, num_elements-1, d);
loop_cnt[i] = (int)get_random_float(0, num_elements/32, d);
}
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, loop_indx, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, loop_cnt, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &loop_kernel_code, "test_loop" );
if (err)
return -1;
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadArray failed\n");
return -1;
}
err = verify_loop(input_ptr, loop_indx, loop_cnt, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(loop_indx);
free(loop_cnt);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,230 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *multireadimage_kernel_code =
"__kernel void test_multireadimage(read_only image2d_t img0, read_only image2d_t img1, \n"
" read_only image2d_t img2, __global float4 *dst, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int2 tid = (int2)(tid_x, tid_y);\n"
" int indx = tid_y * get_image_width(img1) + tid_x;\n"
" float4 sum;\n"
"\n"
" sum = read_imagef(img0, sampler, tid);\n"
" sum += read_imagef(img1, sampler, tid);\n"
" sum += read_imagef(img2, sampler, tid);\n"
"\n"
" dst[indx] = sum;\n"
"}\n";
#define MAX_ERR 1e-7f
static unsigned char *
generate_8888_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static unsigned short *
generate_16bit_image(int w, int h, MTdata d)
{
unsigned short *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned short)genrand_int32(d);
return ptr;
}
static float *
generate_float_image(int w, int h, MTdata d)
{
float *ptr = (float*)malloc(w * h * 4 * (int)sizeof(float));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
return ptr;
}
static int
verify_multireadimage(void *image[], float *outptr, int w, int h)
{
int i;
float sum;
float ulp, max_ulp = 0.0f;
// ULP error of 1.5 for each read_imagef plus 0.5 for each addition.
float max_ulp_allowed = (float)(3*1.5+2*0.5);
for (i=0; i<w*h*4; i++)
{
sum = (float)((unsigned char *)image[0])[i] / 255.0f;
sum += (float)((unsigned short *)image[1])[i] / 65535.0f;
sum += (float)((float *)image[2])[i];
ulp = Ulp_Error(outptr[i], sum);
if (ulp > max_ulp)
max_ulp = ulp;
}
if (max_ulp > max_ulp_allowed) {
log_error("READ_MULTIREADIMAGE_MULTIFORMAT test failed. Max ulp error = %g\n", max_ulp);
return -1;
}
log_info("READ_MULTIREADIMAGE_MULTIFORMAT test passed. Max ulp error = %g\n", max_ulp);
return 0;
}
int
test_multireadimagemultifmt(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_image_format img_format;
void *input_ptr[3], *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[2];
int img_width = 512;
int img_height = 512;
int i, err;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
input_ptr[0] = (void *)generate_8888_image(img_width, img_height, d);
input_ptr[1] = (void *)generate_16bit_image(img_width, img_height, d);
input_ptr[2] = (void *)generate_float_image(img_width, img_height, d);
free_mtdata(d); d = NULL;
output_ptr = (void *)malloc(sizeof(float) * 4 * img_width * img_height);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[0])
{
log_error("create_image_2d failed\n");
return -1;
}
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[1])
{
log_error("create_image_2d failed\n");
return -1;
}
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[2])
{
log_error("create_image_2d failed\n");
return -1;
}
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(float)*4 * img_width*img_height, NULL, NULL);
if (!streams[3])
{
log_error("clCreateBuffer failed\n");
return -1;
}
for (i=0; i<3; i++)
{
size_t origin[3] = {0,0,0}, region[3]={img_width, img_height,1};
err = clEnqueueWriteImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, input_ptr[i], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteImage failed\n");
return -1;
}
}
err = create_single_kernel_helper( context, &program, &kernel, 1, &multireadimage_kernel_code, "test_multireadimage");
if (err)
return -1;
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
for (i=0; i<4; i++)
err |= clSetKernelArg(kernel, i,sizeof streams[i], &streams[i]);
err |= clSetKernelArg(kernel, 4, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (size_t)img_width;
threads[1] = (size_t)img_height;
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, sizeof(float)*4*img_width*img_height, (void *)output_ptr, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_multireadimage(input_ptr, (float*)output_ptr, img_width, img_height);
// cleanup
clReleaseSampler(sampler);
for (i=0; i<4; i++)
clReleaseMemObject(streams[i]);
clReleaseKernel(kernel);
clReleaseProgram(program);
for (i=0; i<3; i++)
free(input_ptr[i]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,198 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *multireadimage_kernel_code =
"__kernel void test_multireadimage(int n, int m, sampler_t sampler, \n"
" read_only image2d_t img0, read_only image2d_t img1, \n"
" read_only image2d_t img2, read_only image2d_t img3, \n"
" read_only image2d_t img4, read_only image2d_t img5, \n"
" read_only image2d_t img6, __global float4 *dst)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int2 tid = (int2)(tid_x, tid_y);\n"
" int indx = tid_y * get_image_width(img5) + tid_x;\n"
" float4 sum;\n"
"\n"
" sum = read_imagef(img0, sampler, tid);\n"
" sum += read_imagef(img1, sampler, tid);\n"
" sum += read_imagef(img2, sampler, tid);\n"
" sum += read_imagef(img3, sampler, tid);\n"
" sum += read_imagef(img4, sampler, tid);\n"
" sum += read_imagef(img5, sampler, tid);\n"
" sum += read_imagef(img6, sampler, tid);\n"
"\n"
" dst[indx] = sum;\n"
"}\n";
static unsigned char *
generate_8888_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static int
verify_multireadimage(void *image[], int num_images, float *outptr, int w, int h)
{
int i, j;
float sum;
float ulp, max_ulp = 0.0f;
// ULP error of 1.5 for each read_imagef plus 0.5 for each addition.
float max_ulp_allowed = (float)(num_images*1.5+0.5*(num_images-1));
for (i=0; i<w*h*4; i++)
{
sum = 0.0f;
for (j=0; j<num_images; j++)
{
sum += ((float)((unsigned char *)image[j])[i] / 255.0f);
}
ulp = Ulp_Error(outptr[i], sum);
if (ulp > max_ulp)
max_ulp = ulp;
}
if (max_ulp > max_ulp_allowed)
{
log_error("READ_MULTIREADIMAGE_RGBA8888 test failed. Max ULP err = %g\n", max_ulp);
return -1;
}
log_info("READ_MULTIREADIMAGE_RGBA8888 test passed. Max ULP err = %g\n", max_ulp);
return 0;
}
int test_multireadimageonefmt(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[8];
cl_image_format img_format;
void *input_ptr[7], *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[2];
int img_width = 512;
int img_height = 512;
int i, err;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, 1};
size_t length = img_width * img_height * 4 * sizeof(float);
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
output_ptr = malloc(length);
d = init_genrand( gRandomSeed );
for (i=0; i<7; i++) {
input_ptr[i] = (void *)generate_8888_image(img_width, img_height, d);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[i] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[i])
{
log_error("create_image_2d failed\n");
return -1;
}
err = clEnqueueWriteImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, input_ptr[i], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteImage failed\n");
return -1;
}
}
free_mtdata(d); d = NULL;
streams[7] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[7])
{
log_error("clCreateArray failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &multireadimage_kernel_code, "test_multireadimage");
if (err)
return -1;
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
err = clSetKernelArg(kernel, 0, sizeof i, &i);
err |= clSetKernelArg(kernel, 1, sizeof err, &err);
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
for (i=0; i<8; i++)
err |= clSetKernelArg(kernel, 3+i, sizeof streams[i], &streams[i]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)img_width;
threads[1] = (unsigned int)img_height;
err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clExecuteKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[7], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadArray failed\n");
return -1;
}
err = verify_multireadimage(input_ptr, 7, (float *)output_ptr, img_width, img_height);
// cleanup
clReleaseSampler(sampler);
for (i=0; i<8; i++)
clReleaseMemObject(streams[i]);
clReleaseKernel(kernel);
clReleaseProgram(program);
for (i=0; i<7; i++)
free(input_ptr[i]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,710 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
#define TEST_VALUE_POSITIVE( string_name, name, value ) \
{ \
if (name < value) { \
log_error("FAILED: " string_name ": " #name " < " #value "\n"); \
errors++;\
} else { \
log_info("\t" string_name ": " #name " >= " #value "\n"); \
} \
}
#define TEST_VALUE_NEGATIVE( string_name, name, value ) \
{ \
if (name > value) { \
log_error("FAILED: " string_name ": " #name " > " #value "\n"); \
errors++;\
} else { \
log_info("\t" string_name ": " #name " <= " #value "\n"); \
} \
}
#define TEST_VALUE_EQUAL_LITERAL( string_name, name, value ) \
{ \
if (name != value) { \
log_error("FAILED: " string_name ": " #name " != " #value "\n"); \
errors++;\
} else { \
log_info("\t" string_name ": " #name " = " #value "\n"); \
} \
}
#define TEST_VALUE_EQUAL( string_name, name, value ) \
{ \
if (name != value) { \
log_error("FAILED: " string_name ": " #name " != %a (%17.21g)\n", value, value); \
errors++;\
} else { \
log_info("\t" string_name ": " #name " = %a (%17.21g)\n", value, value); \
} \
}
int test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int errors = 0;
TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_BIT", CL_CHAR_BIT, 8)
TEST_VALUE_EQUAL_LITERAL( "CL_SCHAR_MAX", CL_SCHAR_MAX, 127)
TEST_VALUE_EQUAL_LITERAL( "CL_SCHAR_MIN", CL_SCHAR_MIN, (-127-1))
TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_MAX", CL_CHAR_MAX, CL_SCHAR_MAX)
TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_MIN", CL_CHAR_MIN, CL_SCHAR_MIN)
TEST_VALUE_EQUAL_LITERAL( "CL_UCHAR_MAX", CL_UCHAR_MAX, 255)
TEST_VALUE_EQUAL_LITERAL( "CL_SHRT_MAX", CL_SHRT_MAX, 32767)
TEST_VALUE_EQUAL_LITERAL( "CL_SHRT_MIN", CL_SHRT_MIN, (-32767-1))
TEST_VALUE_EQUAL_LITERAL( "CL_USHRT_MAX", CL_USHRT_MAX, 65535)
TEST_VALUE_EQUAL_LITERAL( "CL_INT_MAX", CL_INT_MAX, 2147483647)
TEST_VALUE_EQUAL_LITERAL( "CL_INT_MIN", CL_INT_MIN, (-2147483647-1))
TEST_VALUE_EQUAL_LITERAL( "CL_UINT_MAX", CL_UINT_MAX, 0xffffffffU)
TEST_VALUE_EQUAL_LITERAL( "CL_LONG_MAX", CL_LONG_MAX, ((cl_long) 0x7FFFFFFFFFFFFFFFLL))
TEST_VALUE_EQUAL_LITERAL( "CL_LONG_MIN", CL_LONG_MIN, ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL))
TEST_VALUE_EQUAL_LITERAL( "CL_ULONG_MAX", CL_ULONG_MAX, ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL))
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_DIG", CL_FLT_DIG, 6)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MANT_DIG", CL_FLT_MANT_DIG, 24)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX_10_EXP", CL_FLT_MAX_10_EXP, +38)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX_EXP", CL_FLT_MAX_EXP, +128)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN_10_EXP", CL_FLT_MIN_10_EXP, -37)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN_EXP", CL_FLT_MIN_EXP, -125)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_RADIX", CL_FLT_RADIX, 2)
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX", CL_FLT_MAX, MAKE_HEX_FLOAT( 0x1.fffffep127f, 0x1fffffeL, 103))
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN", CL_FLT_MIN, MAKE_HEX_FLOAT(0x1.0p-126f, 0x1L, -126))
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_EPSILON", CL_FLT_EPSILON, MAKE_HEX_FLOAT(0x1.0p-23f, 0x1L, -23))
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_DIG", CL_DBL_DIG, 15)
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MANT_DIG", CL_DBL_MANT_DIG, 53)
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MAX_10_EXP", CL_DBL_MAX_10_EXP, +308)
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MAX_EXP", CL_DBL_MAX_EXP, +1024)
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MIN_10_EXP", CL_DBL_MIN_10_EXP, -307)
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MIN_EXP", CL_DBL_MIN_EXP, -1021)
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_RADIX", CL_DBL_RADIX, 2)
TEST_VALUE_EQUAL( "CL_DBL_MAX", CL_DBL_MAX, MAKE_HEX_DOUBLE(0x1.fffffffffffffp1023, 0x1fffffffffffffLL, 971))
TEST_VALUE_EQUAL( "CL_DBL_MIN", CL_DBL_MIN, MAKE_HEX_DOUBLE(0x1.0p-1022, 0x1LL, -1022))
TEST_VALUE_EQUAL( "CL_DBL_EPSILON", CL_DBL_EPSILON, MAKE_HEX_DOUBLE(0x1.0p-52, 0x1LL, -52))
TEST_VALUE_EQUAL( "CL_M_E", CL_M_E, MAKE_HEX_DOUBLE(0x1.5bf0a8b145769p+1, 0x15bf0a8b145769LL, -51) );
TEST_VALUE_EQUAL( "CL_M_LOG2E", CL_M_LOG2E, MAKE_HEX_DOUBLE(0x1.71547652b82fep+0, 0x171547652b82feLL, -52) );
TEST_VALUE_EQUAL( "CL_M_LOG10E", CL_M_LOG10E, MAKE_HEX_DOUBLE(0x1.bcb7b1526e50ep-2, 0x1bcb7b1526e50eLL, -54) );
TEST_VALUE_EQUAL( "CL_M_LN2", CL_M_LN2, MAKE_HEX_DOUBLE(0x1.62e42fefa39efp-1, 0x162e42fefa39efLL, -53) );
TEST_VALUE_EQUAL( "CL_M_LN10", CL_M_LN10, MAKE_HEX_DOUBLE(0x1.26bb1bbb55516p+1, 0x126bb1bbb55516LL, -51) );
TEST_VALUE_EQUAL( "CL_M_PI", CL_M_PI, MAKE_HEX_DOUBLE(0x1.921fb54442d18p+1, 0x1921fb54442d18LL, -51) );
TEST_VALUE_EQUAL( "CL_M_PI_2", CL_M_PI_2, MAKE_HEX_DOUBLE(0x1.921fb54442d18p+0, 0x1921fb54442d18LL, -52) );
TEST_VALUE_EQUAL( "CL_M_PI_4", CL_M_PI_4, MAKE_HEX_DOUBLE(0x1.921fb54442d18p-1, 0x1921fb54442d18LL, -53) );
TEST_VALUE_EQUAL( "CL_M_1_PI", CL_M_1_PI, MAKE_HEX_DOUBLE(0x1.45f306dc9c883p-2, 0x145f306dc9c883LL, -54) );
TEST_VALUE_EQUAL( "CL_M_2_PI", CL_M_2_PI, MAKE_HEX_DOUBLE(0x1.45f306dc9c883p-1, 0x145f306dc9c883LL, -53) );
TEST_VALUE_EQUAL( "CL_M_2_SQRTPI", CL_M_2_SQRTPI, MAKE_HEX_DOUBLE(0x1.20dd750429b6dp+0, 0x120dd750429b6dLL, -52) );
TEST_VALUE_EQUAL( "CL_M_SQRT2", CL_M_SQRT2, MAKE_HEX_DOUBLE(0x1.6a09e667f3bcdp+0, 0x16a09e667f3bcdLL, -52) );
TEST_VALUE_EQUAL( "CL_M_SQRT1_2", CL_M_SQRT1_2, MAKE_HEX_DOUBLE(0x1.6a09e667f3bcdp-1, 0x16a09e667f3bcdLL, -53) );
TEST_VALUE_EQUAL( "CL_M_E_F", CL_M_E_F, MAKE_HEX_FLOAT(0x1.5bf0a8p+1f, 0x15bf0a8L, -23));
TEST_VALUE_EQUAL( "CL_M_LOG2E_F", CL_M_LOG2E_F, MAKE_HEX_FLOAT(0x1.715476p+0f, 0x1715476L, -24));
TEST_VALUE_EQUAL( "CL_M_LOG10E_F", CL_M_LOG10E_F, MAKE_HEX_FLOAT(0x1.bcb7b2p-2f, 0x1bcb7b2L, -26));
TEST_VALUE_EQUAL( "CL_M_LN2_F", CL_M_LN2_F, MAKE_HEX_FLOAT(0x1.62e43p-1f, 0x162e43L, -21) );
TEST_VALUE_EQUAL( "CL_M_LN10_F", CL_M_LN10_F, MAKE_HEX_FLOAT(0x1.26bb1cp+1f, 0x126bb1cL, -23));
TEST_VALUE_EQUAL( "CL_M_PI_F", CL_M_PI_F, MAKE_HEX_FLOAT(0x1.921fb6p+1f, 0x1921fb6L, -23));
TEST_VALUE_EQUAL( "CL_M_PI_2_F", CL_M_PI_2_F, MAKE_HEX_FLOAT(0x1.921fb6p+0f, 0x1921fb6L, -24));
TEST_VALUE_EQUAL( "CL_M_PI_4_F", CL_M_PI_4_F, MAKE_HEX_FLOAT(0x1.921fb6p-1f, 0x1921fb6L, -25));
TEST_VALUE_EQUAL( "CL_M_1_PI_F", CL_M_1_PI_F, MAKE_HEX_FLOAT(0x1.45f306p-2f, 0x145f306L, -26));
TEST_VALUE_EQUAL( "CL_M_2_PI_F", CL_M_2_PI_F, MAKE_HEX_FLOAT(0x1.45f306p-1f, 0x145f306L, -25));
TEST_VALUE_EQUAL( "CL_M_2_SQRTPI_F", CL_M_2_SQRTPI_F,MAKE_HEX_FLOAT(0x1.20dd76p+0f, 0x120dd76L, -24));
TEST_VALUE_EQUAL( "CL_M_SQRT2_F", CL_M_SQRT2_F, MAKE_HEX_FLOAT(0x1.6a09e6p+0f, 0x16a09e6L, -24));
TEST_VALUE_EQUAL( "CL_M_SQRT1_2_F", CL_M_SQRT1_2_F, MAKE_HEX_FLOAT(0x1.6a09e6p-1f, 0x16a09e6L, -25));
return errors;
}
const char *kernel_int_float[] = {
"__kernel void test( __global float *float_out, __global int *int_out, __global uint *uint_out) \n"
"{\n"
" int_out[0] = CHAR_BIT;\n"
" int_out[1] = SCHAR_MAX;\n"
" int_out[2] = SCHAR_MIN;\n"
" int_out[3] = CHAR_MAX;\n"
" int_out[4] = CHAR_MIN;\n"
" int_out[5] = UCHAR_MAX;\n"
" int_out[6] = SHRT_MAX;\n"
" int_out[7] = SHRT_MIN;\n"
" int_out[8] = USHRT_MAX;\n"
" int_out[9] = INT_MAX;\n"
" int_out[10] = INT_MIN;\n"
" uint_out[0] = UINT_MAX;\n"
" int_out[11] = FLT_DIG;\n"
" int_out[12] = FLT_MANT_DIG;\n"
" int_out[13] = FLT_MAX_10_EXP;\n"
" int_out[14] = FLT_MAX_EXP;\n"
" int_out[15] = FLT_MIN_10_EXP;\n"
" int_out[16] = FLT_MIN_EXP;\n"
" int_out[17] = FLT_RADIX;\n"
"#ifdef __IMAGE_SUPPORT__\n"
" int_out[18] = __IMAGE_SUPPORT__;\n"
"#else\n"
" int_out[18] = 0xf00baa;\n"
"#endif\n"
" float_out[0] = FLT_MAX;\n"
" float_out[1] = FLT_MIN;\n"
" float_out[2] = FLT_EPSILON;\n"
" float_out[3] = M_E_F;\n"
" float_out[4] = M_LOG2E_F;\n"
" float_out[5] = M_LOG10E_F;\n"
" float_out[6] = M_LN2_F;\n"
" float_out[7] = M_LN10_F;\n"
" float_out[8] = M_PI_F;\n"
" float_out[9] = M_PI_2_F;\n"
" float_out[10] = M_PI_4_F;\n"
" float_out[11] = M_1_PI_F;\n"
" float_out[12] = M_2_PI_F;\n"
" float_out[13] = M_2_SQRTPI_F;\n"
" float_out[14] = M_SQRT2_F;\n"
" float_out[15] = M_SQRT1_2_F;\n"
"}\n"
};
const char *kernel_long[] = {
"__kernel void test(__global long *long_out, __global ulong *ulong_out) \n"
"{\n"
" long_out[0] = LONG_MAX;\n"
" long_out[1] = LONG_MIN;\n"
" ulong_out[0] = ULONG_MAX;\n"
"}\n"
};
const char *kernel_double[] = {
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"__kernel void test( __global double *double_out, __global long *long_out ) \n "
"{\n"
" long_out[0] = DBL_DIG;\n"
" long_out[1] = DBL_MANT_DIG;\n"
" long_out[2] = DBL_MAX_10_EXP;\n"
" long_out[3] = DBL_MAX_EXP;\n"
" long_out[4] = DBL_MIN_10_EXP;\n"
" long_out[5] = DBL_MIN_EXP;\n"
" long_out[6] = DBL_RADIX;\n"
" double_out[0] = DBL_MAX;\n"
" double_out[1] = DBL_MIN;\n"
" double_out[2] = DBL_EPSILON;\n"
" double_out[3] = M_E;\n"
" double_out[4] = M_LOG2E;\n"
" double_out[5] = M_LOG10E;\n"
" double_out[6] = M_LN2;\n"
" double_out[7] = M_LN10;\n"
" double_out[8] = M_PI;\n"
" double_out[9] = M_PI_2;\n"
" double_out[10] = M_PI_4;\n"
" double_out[11] = M_1_PI;\n"
" double_out[12] = M_2_PI;\n"
" double_out[13] = M_2_SQRTPI;\n"
" double_out[14] = M_SQRT2;\n"
" double_out[15] = M_SQRT1_2;\n"
"}\n"
};
int test_kernel_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error, errors = 0;
// clProgramWrapper program;
// clKernelWrapper kernel;
// clMemWrapper streams[3];
cl_program program;
cl_kernel kernel;
cl_mem streams[3];
size_t threads[] = {1,1,1};
cl_float float_out[16];
cl_int int_out[19];
cl_uint uint_out[1];
cl_long long_out[7];
cl_ulong ulong_out[1];
cl_double double_out[16];
/** INTs and FLOATs **/
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_int_float, "test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(float_out), NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(int_out), NULL, &error);
test_error( error, "Creating test array failed" );
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(uint_out), NULL, &error);
test_error( error, "Creating test array failed" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(float_out), (void*)float_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(int_out), (void*)int_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(uint_out), (void*)uint_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
TEST_VALUE_EQUAL_LITERAL( "CHAR_BIT", int_out[0], 8)
TEST_VALUE_EQUAL_LITERAL( "SCHAR_MAX", int_out[1], 127)
TEST_VALUE_EQUAL_LITERAL( "SCHAR_MIN", int_out[2], (-127-1))
TEST_VALUE_EQUAL_LITERAL( "CHAR_MAX", int_out[3], CL_SCHAR_MAX)
TEST_VALUE_EQUAL_LITERAL( "CHAR_MIN", int_out[4], CL_SCHAR_MIN)
TEST_VALUE_EQUAL_LITERAL( "UCHAR_MAX", int_out[5], 255)
TEST_VALUE_EQUAL_LITERAL( "SHRT_MAX", int_out[6], 32767)
TEST_VALUE_EQUAL_LITERAL( "SHRT_MIN",int_out[7], (-32767-1))
TEST_VALUE_EQUAL_LITERAL( "USHRT_MAX", int_out[8], 65535)
TEST_VALUE_EQUAL_LITERAL( "INT_MAX", int_out[9], 2147483647)
TEST_VALUE_EQUAL_LITERAL( "INT_MIN", int_out[10], (-2147483647-1))
TEST_VALUE_EQUAL_LITERAL( "UINT_MAX", uint_out[0], 0xffffffffU)
TEST_VALUE_EQUAL_LITERAL( "FLT_DIG", int_out[11], 6)
TEST_VALUE_EQUAL_LITERAL( "FLT_MANT_DIG", int_out[12], 24)
TEST_VALUE_EQUAL_LITERAL( "FLT_MAX_10_EXP", int_out[13], +38)
TEST_VALUE_EQUAL_LITERAL( "FLT_MAX_EXP", int_out[14], +128)
TEST_VALUE_EQUAL_LITERAL( "FLT_MIN_10_EXP", int_out[15], -37)
TEST_VALUE_EQUAL_LITERAL( "FLT_MIN_EXP", int_out[16], -125)
TEST_VALUE_EQUAL_LITERAL( "FLT_RADIX", int_out[17], 2)
TEST_VALUE_EQUAL( "FLT_MAX", float_out[0], MAKE_HEX_FLOAT(0x1.fffffep127f, 0x1fffffeL, 103))
TEST_VALUE_EQUAL( "FLT_MIN", float_out[1], MAKE_HEX_FLOAT(0x1.0p-126f, 0x1L, -126))
TEST_VALUE_EQUAL( "FLT_EPSILON", float_out[2], MAKE_HEX_FLOAT(0x1.0p-23f, 0x1L, -23))
TEST_VALUE_EQUAL( "M_E_F", float_out[3], CL_M_E_F )
TEST_VALUE_EQUAL( "M_LOG2E_F", float_out[4], CL_M_LOG2E_F )
TEST_VALUE_EQUAL( "M_LOG10E_F", float_out[5], CL_M_LOG10E_F )
TEST_VALUE_EQUAL( "M_LN2_F", float_out[6], CL_M_LN2_F )
TEST_VALUE_EQUAL( "M_LN10_F", float_out[7], CL_M_LN10_F )
TEST_VALUE_EQUAL( "M_PI_F", float_out[8], CL_M_PI_F )
TEST_VALUE_EQUAL( "M_PI_2_F", float_out[9], CL_M_PI_2_F )
TEST_VALUE_EQUAL( "M_PI_4_F", float_out[10], CL_M_PI_4_F )
TEST_VALUE_EQUAL( "M_1_PI_F", float_out[11], CL_M_1_PI_F )
TEST_VALUE_EQUAL( "M_2_PI_F", float_out[12], CL_M_2_PI_F )
TEST_VALUE_EQUAL( "M_2_SQRTPI_F", float_out[13], CL_M_2_SQRTPI_F )
TEST_VALUE_EQUAL( "M_SQRT2_F", float_out[14], CL_M_SQRT2_F )
TEST_VALUE_EQUAL( "M_SQRT1_2_F", float_out[15], CL_M_SQRT1_2_F )
// We need to check these values against what we know is supported on the device
if( checkForImageSupport( deviceID ) == 0 )
{ // has images
// If images are supported, the constant should have been defined to the value 1
if( int_out[18] == 0xf00baa )
{
log_error( "FAILURE: __IMAGE_SUPPORT__ undefined even though images are supported\n" );
return -1;
}
else if( int_out[18] != 1 )
{
log_error( "FAILURE: __IMAGE_SUPPORT__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", int_out[18] );
return -1;
}
}
else
{ // no images
// If images aren't supported, the constant should be undefined
if( int_out[18] != 0xf00baa )
{
log_error( "FAILURE: __IMAGE_SUPPORT__ defined to value %d even though images aren't supported", int_out[18] );
return -1;
}
}
log_info( "\t__IMAGE_SUPPORT__: %d\n", int_out[18]);
clReleaseMemObject(streams[0]); streams[0] = NULL;
clReleaseMemObject(streams[1]); streams[1] = NULL;
clReleaseMemObject(streams[2]); streams[2] = NULL;
clReleaseKernel(kernel); kernel = NULL;
clReleaseProgram(program); program = NULL;
/** LONGs **/
if(!gHasLong) {
log_info("Longs not supported; skipping long tests.\n");
}
else
{
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_long, "test" ) != 0 )
{
return -1;
}
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(long_out), NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(ulong_out), NULL, &error);
test_error( error, "Creating test array failed" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(long_out), &long_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(ulong_out), &ulong_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
TEST_VALUE_EQUAL_LITERAL( "LONG_MAX", long_out[0], ((cl_long) 0x7FFFFFFFFFFFFFFFLL))
TEST_VALUE_EQUAL_LITERAL( "LONG_MIN", long_out[1], ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL))
TEST_VALUE_EQUAL_LITERAL( "ULONG_MAX", ulong_out[0], ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL))
clReleaseMemObject(streams[0]); streams[0] = NULL;
clReleaseMemObject(streams[1]); streams[1] = NULL;
clReleaseKernel(kernel); kernel = NULL;
clReleaseProgram(program); program = NULL;
}
/** DOUBLEs **/
if(!is_extension_available(deviceID, "cl_khr_fp64")) {
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
}
else
{
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_double, "test" ) != 0 )
{
return -1;
}
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(double_out), NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(long_out), NULL, &error);
test_error( error, "Creating test array failed" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(double_out), &double_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(long_out), &long_out, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
TEST_VALUE_EQUAL_LITERAL( "DBL_DIG", long_out[0], 15)
TEST_VALUE_EQUAL_LITERAL( "DBL_MANT_DIG", long_out[1], 53)
TEST_VALUE_EQUAL_LITERAL( "DBL_MAX_10_EXP", long_out[2], +308)
TEST_VALUE_EQUAL_LITERAL( "DBL_MAX_EXP", long_out[3], +1024)
TEST_VALUE_EQUAL_LITERAL( "DBL_MIN_10_EXP", long_out[4], -307)
TEST_VALUE_EQUAL_LITERAL( "DBL_MIN_EXP", long_out[5], -1021)
TEST_VALUE_EQUAL_LITERAL( "DBL_RADIX", long_out[6], 2)
TEST_VALUE_EQUAL( "DBL_MAX", double_out[0], MAKE_HEX_DOUBLE(0x1.fffffffffffffp1023, 0x1fffffffffffffLL, 971))
TEST_VALUE_EQUAL( "DBL_MIN", double_out[1], MAKE_HEX_DOUBLE(0x1.0p-1022, 0x1LL, -1022))
TEST_VALUE_EQUAL( "DBL_EPSILON", double_out[2], MAKE_HEX_DOUBLE(0x1.0p-52, 0x1LL, -52))
//TEST_VALUE_EQUAL( "M_E", double_out[3], CL_M_E )
TEST_VALUE_EQUAL( "M_LOG2E", double_out[4], CL_M_LOG2E )
TEST_VALUE_EQUAL( "M_LOG10E", double_out[5], CL_M_LOG10E )
TEST_VALUE_EQUAL( "M_LN2", double_out[6], CL_M_LN2 )
TEST_VALUE_EQUAL( "M_LN10", double_out[7], CL_M_LN10 )
TEST_VALUE_EQUAL( "M_PI", double_out[8], CL_M_PI )
TEST_VALUE_EQUAL( "M_PI_2", double_out[9], CL_M_PI_2 )
TEST_VALUE_EQUAL( "M_PI_4", double_out[10], CL_M_PI_4 )
TEST_VALUE_EQUAL( "M_1_PI", double_out[11], CL_M_1_PI )
TEST_VALUE_EQUAL( "M_2_PI", double_out[12], CL_M_2_PI )
TEST_VALUE_EQUAL( "M_2_SQRTPI", double_out[13], CL_M_2_SQRTPI )
TEST_VALUE_EQUAL( "M_SQRT2", double_out[14], CL_M_SQRT2 )
TEST_VALUE_EQUAL( "M_SQRT1_2", double_out[15], CL_M_SQRT1_2 )
clReleaseMemObject(streams[0]); streams[0] = NULL;
clReleaseMemObject(streams[1]); streams[1] = NULL;
clReleaseKernel(kernel); kernel = NULL;
clReleaseProgram(program); program = NULL;
}
error = clFinish(queue);
test_error(error, "clFinish failed");
return errors;
}
const char *kernel_constant_limits[] = {
"__kernel void test( __global int *intOut, __global float *floatOut ) \n"
"{\n"
" intOut[0] = isinf( MAXFLOAT ) ? 1 : 0;\n"
" intOut[1] = isnormal( MAXFLOAT ) ? 1 : 0;\n"
" intOut[2] = isnan( MAXFLOAT ) ? 1 : 0;\n"
" intOut[3] = sizeof( MAXFLOAT );\n"
" intOut[4] = ( MAXFLOAT == FLT_MAX ) ? 1 : 0;\n"
// " intOut[5] = ( MAXFLOAT == CL_FLT_MAX ) ? 1 : 0;\n"
" intOut[6] = ( MAXFLOAT == MAXFLOAT ) ? 1 : 0;\n"
" intOut[7] = ( MAXFLOAT == 0x1.fffffep127f ) ? 1 : 0;\n"
" floatOut[0] = MAXFLOAT;\n"
"}\n"
};
const char *kernel_constant_extended_limits[] = {
"__kernel void test( __global int *intOut, __global float *floatOut ) \n"
"{\n"
" intOut[0] = ( INFINITY == HUGE_VALF ) ? 1 : 0;\n"
" intOut[1] = sizeof( INFINITY );\n"
" intOut[2] = isinf( INFINITY ) ? 1 : 0;\n"
" intOut[3] = isnormal( INFINITY ) ? 1 : 0;\n"
" intOut[4] = isnan( INFINITY ) ? 1 : 0;\n"
" intOut[5] = ( INFINITY > MAXFLOAT ) ? 1 : 0;\n"
" intOut[6] = ( -INFINITY < -MAXFLOAT ) ? 1 : 0;\n"
" intOut[7] = ( ( MAXFLOAT + MAXFLOAT ) == INFINITY ) ? 1 : 0;\n"
" intOut[8] = ( nextafter( MAXFLOAT, INFINITY ) == INFINITY ) ? 1 : 0;\n"
" intOut[9] = ( nextafter( -MAXFLOAT, -INFINITY ) == -INFINITY ) ? 1 : 0;\n"
" intOut[10] = ( INFINITY == INFINITY ) ? 1 : 0;\n"
" intOut[11] = ( as_uint( INFINITY ) == 0x7f800000 ) ? 1 : 0;\n"
" floatOut[0] = INFINITY;\n"
"\n"
" intOut[12] = sizeof( HUGE_VALF );\n"
" intOut[13] = ( HUGE_VALF == INFINITY ) ? 1 : 0;\n"
" floatOut[1] = HUGE_VALF;\n"
"\n"
" intOut[14] = ( NAN == NAN ) ? 1 : 0;\n"
" intOut[15] = ( NAN != NAN ) ? 1 : 0;\n"
" intOut[16] = isnan( NAN ) ? 1 : 0;\n"
" intOut[17] = isinf( NAN ) ? 1 : 0;\n"
" intOut[18] = isnormal( NAN ) ? 1 : 0;\n"
" intOut[19] = ( ( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000 ) ? 1 : 0;\n"
" intOut[20] = sizeof( NAN );\n"
" floatOut[2] = NAN;\n"
"\n"
" intOut[21] = isnan( INFINITY / INFINITY ) ? 1 : 0;\n"
" intOut[22] = isnan( INFINITY - INFINITY ) ? 1 : 0;\n"
" intOut[23] = isnan( 0.f / 0.f ) ? 1 : 0;\n"
" intOut[24] = isnan( INFINITY * 0.f ) ? 1 : 0;\n"
" intOut[25] = ( INFINITY == NAN ); \n"
" intOut[26] = ( -INFINITY == NAN ); \n"
" intOut[27] = ( INFINITY > NAN ); \n"
" intOut[28] = ( -INFINITY < NAN ); \n"
" intOut[29] = ( INFINITY != NAN ); \n"
" intOut[30] = ( NAN > INFINITY ); \n"
" intOut[31] = ( NAN < -INFINITY ); \n"
"}\n"
};
const char *kernel_constant_double_limits[] = {
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"__kernel void test( __global int *intOut, __global double *doubleOut ) \n"
"{\n"
" intOut[0] = sizeof( HUGE_VAL );\n"
" intOut[1] = ( HUGE_VAL == INFINITY ) ? 1 : 0;\n"
" intOut[2] = isinf( HUGE_VAL ) ? 1 : 0;\n"
" intOut[3] = isnormal( HUGE_VAL ) ? 1 : 0;\n"
" intOut[4] = isnan( HUGE_VAL ) ? 1 : 0;\n"
" intOut[5] = ( HUGE_VAL == HUGE_VALF ) ? 1 : 0;\n"
" intOut[6] = ( as_ulong( HUGE_VAL ) == 0x7ff0000000000000UL ) ? 1 : 0;\n"
" doubleOut[0] = HUGE_VAL;\n"
"}\n"
};
#define TEST_FLOAT_ASSERTION( a, msg, f ) if( !( a ) ) { log_error( "ERROR: Float constant failed requirement: %s (bitwise value is 0x%8.8x)\n", msg, *( (uint32_t *)&f ) ); return -1; }
#define TEST_DOUBLE_ASSERTION( a, msg, f ) if( !( a ) ) { log_error( "ERROR: Double constant failed requirement: %s (bitwise value is 0x%16.16llx)\n", msg, *( (uint64_t *)&f ) ); return -1; }
int test_kernel_limit_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
size_t threads[] = {1,1,1};
clMemWrapper intStream, floatStream, doubleStream;
cl_int intOut[ 32 ];
cl_float floatOut[ 3 ];
cl_double doubleOut[ 1 ];
/* Create some I/O streams */
intStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(intOut), NULL, &error );
test_error( error, "Creating test array failed" );
floatStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(floatOut), NULL, &error );
test_error( error, "Creating test array failed" );
// Stage 1: basic limits on MAXFLOAT
{
clProgramWrapper program;
clKernelWrapper kernel;
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_limits, "test" ) != 0 )
{
return -1;
}
error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg( kernel, 1, sizeof( floatStream ), &floatStream );
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, floatStream, CL_TRUE, 0, sizeof(floatOut), floatOut, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
// Test MAXFLOAT properties
TEST_FLOAT_ASSERTION( intOut[0] == 0, "isinf( MAXFLOAT ) = false", floatOut[0] )
TEST_FLOAT_ASSERTION( intOut[1] == 1, "isnormal( MAXFLOAT ) = true", floatOut[0] )
TEST_FLOAT_ASSERTION( intOut[2] == 0, "isnan( MAXFLOAT ) = false", floatOut[0] )
TEST_FLOAT_ASSERTION( intOut[3] == 4, "sizeof( MAXFLOAT ) = 4", floatOut[0] )
TEST_FLOAT_ASSERTION( intOut[4] == 1, "MAXFLOAT = FLT_MAX", floatOut[0] )
TEST_FLOAT_ASSERTION( floatOut[0] == CL_FLT_MAX, "MAXFLOAT = CL_FLT_MAX", floatOut[0] )
TEST_FLOAT_ASSERTION( intOut[6] == 1, "MAXFLOAT = MAXFLOAT", floatOut[0] )
TEST_FLOAT_ASSERTION( floatOut[0] == MAKE_HEX_FLOAT( 0x1.fffffep127f, 0x1fffffeL, 103), "MAXFLOAT = 0x1.fffffep127f", floatOut[0] )
}
// Stage 2: INFINITY and NAN
char profileStr[128] = "";
error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL );
test_error( error, "Unable to run INFINITY/NAN tests (unable to get CL_DEVICE_PROFILE" );
bool testInfNan = true;
if( strcmp( profileStr, "EMBEDDED_PROFILE" ) == 0 )
{
// We test if we're not an embedded profile, OR if the inf/nan flag in the config is set
cl_device_fp_config single = 0;
error = clGetDeviceInfo( deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
test_error( error, "Unable to run INFINITY/NAN tests (unable to get FP_CONFIG bits)" );
if( ( single & CL_FP_INF_NAN ) == 0 )
{
log_info( "Skipping INFINITY and NAN tests on embedded device (INF/NAN not supported on this device)" );
testInfNan = false;
}
}
if( testInfNan )
{
clProgramWrapper program;
clKernelWrapper kernel;
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_extended_limits, "test" ) != 0 )
{
return -1;
}
error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg( kernel, 1, sizeof( floatStream ), &floatStream );
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, floatStream, CL_TRUE, 0, sizeof(floatOut), floatOut, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
TEST_FLOAT_ASSERTION( intOut[0] == 1, "INFINITY == HUGE_VALF", intOut[0] )
TEST_FLOAT_ASSERTION( intOut[1] == 4, "sizeof( INFINITY ) == 4", intOut[1] )
TEST_FLOAT_ASSERTION( intOut[2] == 1, "isinf( INFINITY ) == true", intOut[2] )
TEST_FLOAT_ASSERTION( intOut[3] == 0, "isnormal( INFINITY ) == false", intOut[3] )
TEST_FLOAT_ASSERTION( intOut[4] == 0, "isnan( INFINITY ) == false", intOut[4] )
TEST_FLOAT_ASSERTION( intOut[5] == 1, "INFINITY > MAXFLOAT", intOut[5] )
TEST_FLOAT_ASSERTION( intOut[6] == 1, "-INFINITY < -MAXFLOAT", intOut[6] )
TEST_FLOAT_ASSERTION( intOut[7] == 1, "( MAXFLOAT + MAXFLOAT ) == INFINITY", intOut[7] )
TEST_FLOAT_ASSERTION( intOut[8] == 1, "nextafter( MAXFLOAT, INFINITY ) == INFINITY", intOut[8] )
TEST_FLOAT_ASSERTION( intOut[9] == 1, "nextafter( -MAXFLOAT, -INFINITY ) == -INFINITY", intOut[9] )
TEST_FLOAT_ASSERTION( intOut[10] == 1, "INFINITY = INFINITY", intOut[10] )
TEST_FLOAT_ASSERTION( intOut[11] == 1, "asuint( INFINITY ) == 0x7f800000", intOut[11] )
TEST_FLOAT_ASSERTION( *( (uint32_t *)&floatOut[0] ) == 0x7f800000, "asuint( INFINITY ) == 0x7f800000", floatOut[0] )
TEST_FLOAT_ASSERTION( floatOut[1] == INFINITY, "INFINITY == INFINITY", floatOut[1] )
TEST_FLOAT_ASSERTION( intOut[12] == 4, "sizeof( HUGE_VALF ) == 4", intOut[12] )
TEST_FLOAT_ASSERTION( intOut[13] == 1, "HUGE_VALF == INFINITY", intOut[13] )
TEST_FLOAT_ASSERTION( floatOut[1] == HUGE_VALF, "HUGE_VALF == HUGE_VALF", floatOut[1] )
TEST_FLOAT_ASSERTION( intOut[14] == 0, "(NAN == NAN) = false", intOut[14] )
TEST_FLOAT_ASSERTION( intOut[15] == 1, "(NAN != NAN) = true", intOut[15] )
TEST_FLOAT_ASSERTION( intOut[16] == 1, "isnan( NAN ) = true", intOut[16] )
TEST_FLOAT_ASSERTION( intOut[17] == 0, "isinf( NAN ) = false", intOut[17] )
TEST_FLOAT_ASSERTION( intOut[18] == 0, "isnormal( NAN ) = false", intOut[18] )
TEST_FLOAT_ASSERTION( intOut[19] == 1, "( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000", intOut[19] )
TEST_FLOAT_ASSERTION( intOut[20] == 4, "sizeof( NAN ) = 4", intOut[20] )
TEST_FLOAT_ASSERTION( ( *( (uint32_t *)&floatOut[2] ) & 0x7fffffff ) > 0x7f800000, "( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000", floatOut[2] )
TEST_FLOAT_ASSERTION( intOut[ 21 ] == 1, "isnan( INFINITY / INFINITY ) = true", intOut[ 21 ] )
TEST_FLOAT_ASSERTION( intOut[ 22 ] == 1, "isnan( INFINITY - INFINITY ) = true", intOut[ 22 ] )
TEST_FLOAT_ASSERTION( intOut[ 23 ] == 1, "isnan( 0.f / 0.f ) = true", intOut[ 23 ] )
TEST_FLOAT_ASSERTION( intOut[ 24 ] == 1, "isnan( INFINITY * 0.f ) = true", intOut[ 24 ] )
TEST_FLOAT_ASSERTION( intOut[ 25 ] == 0, "( INFINITY == NAN ) = false", intOut[ 25 ] )
TEST_FLOAT_ASSERTION( intOut[ 26 ] == 0, "(-INFINITY == NAN ) = false", intOut[ 26 ] )
TEST_FLOAT_ASSERTION( intOut[ 27 ] == 0, "( INFINITY > NAN ) = false", intOut[ 27 ] )
TEST_FLOAT_ASSERTION( intOut[ 28 ] == 0, "(-INFINITY < NAN ) = false", intOut[ 28 ] )
TEST_FLOAT_ASSERTION( intOut[ 29 ] == 1, "( INFINITY != NAN ) = true", intOut[ 29 ] )
TEST_FLOAT_ASSERTION( intOut[ 30 ] == 0, "( NAN < INFINITY ) = false", intOut[ 30 ] )
TEST_FLOAT_ASSERTION( intOut[ 31 ] == 0, "( NAN > -INFINITY ) = false", intOut[ 31 ] )
}
// Stage 3: limits on HUGE_VAL (double)
if( !is_extension_available( deviceID, "cl_khr_fp64" ) )
log_info( "Note: Skipping double HUGE_VAL tests (doubles unsupported on device)\n" );
else
{
cl_device_fp_config config = 0;
error = clGetDeviceInfo( deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof( config ), &config, NULL );
test_error( error, "Unable to run INFINITY/NAN tests (unable to get double FP_CONFIG bits)" );
if( ( config & CL_FP_INF_NAN ) == 0 )
log_info( "Skipping HUGE_VAL tests (INF/NAN not supported on this device)" );
else
{
clProgramWrapper program;
clKernelWrapper kernel;
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_double_limits, "test" ) != 0 )
{
return -1;
}
doubleStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(doubleOut), NULL, &error );
test_error( error, "Creating test array failed" );
error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg( kernel, 1, sizeof( doubleStream ), &doubleStream );
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, doubleStream, CL_TRUE, 0, sizeof(doubleOut), doubleOut, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
TEST_DOUBLE_ASSERTION( intOut[0] == 8, "sizeof( HUGE_VAL ) = 8", intOut[0] )
TEST_DOUBLE_ASSERTION( intOut[1] == 1, "HUGE_VAL = INFINITY", intOut[1] )
TEST_DOUBLE_ASSERTION( intOut[2] == 1, "isinf( HUGE_VAL ) = true", intOut[2] )
TEST_DOUBLE_ASSERTION( intOut[3] == 0, "isnormal( HUGE_VAL ) = false", intOut[3] )
TEST_DOUBLE_ASSERTION( intOut[4] == 0, "isnan( HUGE_VAL ) = false", intOut[4] )
TEST_DOUBLE_ASSERTION( intOut[5] == 1, "HUGE_VAL = HUGE_VAL", intOut[5] )
TEST_DOUBLE_ASSERTION( intOut[6] == 1, "as_ulong( HUGE_VAL ) = 0x7ff0000000000000UL", intOut[6] )
TEST_DOUBLE_ASSERTION( *( (uint64_t *)&doubleOut[0] ) == 0x7ff0000000000000ULL, "as_ulong( HUGE_VAL ) = 0x7ff0000000000000UL", doubleOut[0] )
}
}
return 0;
}

View File

@@ -0,0 +1,140 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *pointer_cast_kernel_code =
"__kernel void test_pointer_cast(__global unsigned char *src, __global unsigned int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" __global unsigned int *p = (__global unsigned int *)src;\n"
"\n"
" dst[tid] = p[tid];\n"
"\n"
"}\n";
int
verify_pointer_cast(unsigned char *inptr, unsigned int *outptr, int n)
{
unsigned int *p = (unsigned int *)inptr;
int i;
cl_uint r;
for (i=0; i<n; i++)
{
r = p[i];
if (r != outptr[i])
{
log_error("POINTER_CAST test failed\n");
return -1;
}
}
log_info("POINTER_CAST test passed\n");
return 0;
}
int test_pointer_cast(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
unsigned char *input_ptr;
unsigned int *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(int) * num_elements;
input_ptr = (unsigned char*)malloc(length);
output_ptr = (unsigned int*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
for (i=0; i<num_elements*4; i++)
input_ptr[i] = (unsigned char)genrand_int32(d);
free_mtdata(d);
d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &pointer_cast_kernel_code, "test_pointer_cast" );
if (err)
return -1;
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadArray failed\n");
return -1;
}
err = verify_pointer_cast(input_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,393 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
#include <ctype.h>
// Test __FILE__, __LINE__, __OPENCL_VERSION__, __OPENCL_C_VERSION__, __ENDIAN_LITTLE__, __ROUNDING_MODE__, __IMAGE_SUPPORT__, __FAST_RELAXED_MATH__
// __kernel_exec
const char *preprocessor_test = {
"#line 2 \"%s\"\n"
"__kernel void test( __global int *results, __global char *outFileString, __global char *outRoundingString )\n"
"{\n"
// Integer preprocessor macros
"#ifdef __IMAGE_SUPPORT__\n"
" results[0] = __IMAGE_SUPPORT__;\n"
"#else\n"
" results[0] = 0xf00baa;\n"
"#endif\n"
"#ifdef __ENDIAN_LITTLE__\n"
" results[1] = __ENDIAN_LITTLE__;\n"
"#else\n"
" results[1] = 0xf00baa;\n"
"#endif\n"
"#ifdef __OPENCL_VERSION__\n"
" results[2] = __OPENCL_VERSION__;\n"
"#else\n"
" results[2] = 0xf00baa;\n"
"#endif\n"
"#ifdef __OPENCL_C_VERSION__\n"
" results[3] = __OPENCL_C_VERSION__;\n"
"#else\n"
" results[3] = 0xf00baa;\n"
"#endif\n"
"#ifdef __LINE__\n"
" results[4] = __LINE__;\n"
"#else\n"
" results[4] = 0xf00baa;\n"
"#endif\n"
#if 0 // Removed by Affie's request 2/24
"#ifdef __FAST_RELAXED_MATH__\n"
" results[5] = __FAST_RELAXED_MATH__;\n"
"#else\n"
" results[5] = 0xf00baa;\n"
"#endif\n"
#endif
"#ifdef __kernel_exec\n"
" results[6] = 1;\n" // By spec, we can only really evaluate that it is defined, not what it expands to
"#else\n"
" results[6] = 0xf00baa;\n"
"#endif\n"
// String preprocessor macros. Technically, there are strings in OpenCL, but not really.
"#ifdef __FILE__\n"
" int i;\n"
" constant char *f = \"\" __FILE__;\n"
" for( i = 0; f[ i ] != 0 && i < 512; i++ )\n"
" outFileString[ i ] = f[ i ];\n"
" outFileString[ i ] = 0;\n"
"#else\n"
" outFileString[ 0 ] = 0;\n"
"#endif\n"
"}\n"
};
int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 3 ];
int error;
size_t threads[] = {1,1,1};
cl_int results[ 7 ];
cl_char fileString[ 512 ] = "", roundingString[ 128 ] = "";
char programSource[4096];
char curFileName[512];
char *programPtr = programSource;
int i = 0;
snprintf(curFileName, 512, "%s", __FILE__);
#ifdef _WIN32
// Replace "\" with "\\"
while(curFileName[i] != '\0') {
if (curFileName[i] == '\\') {
int j = i + 1;
char prev = '\\';
while (curFileName[j - 1] != '\0') {
char tmp = curFileName[j];
curFileName[j] = prev;
prev = tmp;
j++;
}
i++;
}
i++;
}
#endif
sprintf(programSource,preprocessor_test,curFileName);
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(results), NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(fileString), NULL, &error);
test_error( error, "Creating test array failed" );
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(roundingString), NULL, &error);
test_error( error, "Creating test array failed" );
// Set up and run
for( int i = 0; i < 3; i++ )
{
error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
test_error( error, "Unable to set indexed kernel arguments" );
}
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(results), results, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(fileString), fileString, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(roundingString), roundingString, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
/////// Check the integer results
// We need to check these values against what we know is supported on the device
if( checkForImageSupport( deviceID ) == 0 )
{
// If images are supported, the constant should have been defined to the value 1
if( results[ 0 ] == 0xf00baa )
{
log_error( "ERROR: __IMAGE_SUPPORT__ undefined even though images are supported\n" );
return -1;
}
else if( results[ 0 ] != 1 )
{
log_error( "ERROR: __IMAGE_SUPPORT__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", (int)results[ 0 ] );
return -1;
}
}
else
{
// If images aren't supported, the constant should be undefined
if( results[ 0 ] != 0xf00baa )
{
log_error( "ERROR: __IMAGE_SUPPORT__ defined to value %d even though images aren't supported", (int)results[ 0 ] );
return -1;
}
}
// __ENDIAN_LITTLE__ is similar to __IMAGE_SUPPORT__: 1 if it's true, undefined if it isn't
cl_bool deviceIsLittleEndian;
error = clGetDeviceInfo( deviceID, CL_DEVICE_ENDIAN_LITTLE, sizeof( deviceIsLittleEndian ), &deviceIsLittleEndian, NULL );
test_error( error, "Unable to get endian property of device to validate against" );
if( deviceIsLittleEndian )
{
if( results[ 1 ] == 0xf00baa )
{
log_error( "ERROR: __ENDIAN_LITTLE__ undefined even though the device is little endian\n" );
return -1;
}
else if( results[ 1 ] != 1 )
{
log_error( "ERROR: __ENDIAN_LITTLE__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", (int)results[ 1 ] );
return -1;
}
}
else
{
if( results[ 1 ] != 0xf00baa )
{
log_error( "ERROR: __ENDIAN_LITTLE__ defined to value %d even though the device is not little endian (should be undefined per spec)", (int)results[ 1 ] );
return -1;
}
}
// __OPENCL_VERSION__
if( results[ 2 ] == 0xf00baa )
{
log_error( "ERROR: Kernel preprocessor __OPENCL_VERSION__ undefined!" );
return -1;
}
// The OpenCL version reported by the macro reports the feature level supported by the compiler. Since
// this doesn't directly match any property we can query, we just check to see if it's a sane value
char versionBuffer[ 128 ];
error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( versionBuffer ), versionBuffer, NULL );
test_error( error, "Unable to get device's version to validate against" );
// We need to parse to get the version number to compare against
char *p1, *p2, *p3;
for( p1 = versionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
;
for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
;
for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
;
if( p2 == p3 )
{
log_error( "ERROR: Unable to verify OpenCL version string (platform string is incorrect format)\n" );
return -1;
}
*p2 = 0;
*p3 = 0;
int major = atoi( p1 );
int minor = atoi( p2 + 1 );
int realVersion = ( major * 100 ) + ( minor * 10 );
if( ( results[ 2 ] < 100 ) || ( results[ 2 ] > realVersion ) )
{
log_error( "ERROR: Kernel preprocessor __OPENCL_VERSION__ does not make sense w.r.t. device's version string! "
"(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
return -1;
}
// __OPENCL_C_VERSION__
if( results[ 3 ] == 0xf00baa )
{
log_error( "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ undefined!\n" );
return -1;
}
// The OpenCL C version reported by the macro reports the OpenCL C supported by the compiler for this OpenCL device.
char cVersionBuffer[ 128 ];
error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( cVersionBuffer ), cVersionBuffer, NULL );
test_error( error, "Unable to get device's OpenCL C version to validate against" );
// We need to parse to get the version number to compare against
for( p1 = cVersionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
;
for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
;
for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
;
if( p2 == p3 )
{
log_error( "ERROR: Unable to verify OpenCL C version string (platform string is incorrect format)\n" );
return -1;
}
*p2 = 0;
*p3 = 0;
major = atoi( p1 );
minor = atoi( p2 + 1 );
realVersion = ( major * 100 ) + ( minor * 10 );
if( ( results[ 3 ] < 100 ) || ( results[ 3 ] > realVersion ) )
{
log_error( "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ does not make sense w.r.t. device's version string! "
"(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
return -1;
}
// __LINE__
if( results[ 4 ] == 0xf00baa )
{
log_error( "ERROR: Kernel preprocessor __LINE__ undefined!" );
return -1;
}
// This is fun--we get to search for where __LINE__ actually is so we know what line it should define to!
// Note: it shows up twice, once for the #ifdef, and the other for the actual result output
const char *linePtr = strstr( preprocessor_test, "__LINE__" );
if( linePtr == NULL )
{
log_error( "ERROR: Nonsensical NULL pointer encountered!" );
return -2;
}
linePtr = strstr( linePtr + strlen( "__LINE__" ), "__LINE__" );
if( linePtr == NULL )
{
log_error( "ERROR: Nonsensical NULL pointer encountered!" );
return -2;
}
// Now count how many carriage returns are before the string
const char *retPtr = strchr( preprocessor_test, '\n' );
int retCount = 1;
for( ; ( retPtr < linePtr ) && ( retPtr != NULL ); retPtr = strchr( retPtr + 1, '\n' ) )
retCount++;
if( retCount != results[ 4 ] )
{
log_error( "ERROR: Kernel preprocessor __LINE__ does not expand to the actual line number! (expanded to %d, but was on line %d)\n",
results[ 4 ], retCount );
return -1;
}
#if 0 // Removed by Affie's request 2/24
// __FAST_RELAXED_MATH__
// Since create_single_kernel_helper does NOT define -cl-fast-relaxed-math, this should be undefined
if( results[ 5 ] != 0xf00baa )
{
log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ defined even though build option was not used (should be undefined)\n" );
return -1;
}
#endif
// __kernel_exec
// We can ONLY check to verify that it is defined
if( results[ 6 ] == 0xf00baa )
{
log_error( "ERROR: Kernel preprocessor __kernel_exec must be defined\n" );
return -1;
}
//// String preprocessors
// Since we provided the program directly, __FILE__ should compile to "<program source>".
if( fileString[ 0 ] == 0 )
{
log_error( "ERROR: Kernel preprocessor __FILE__ undefined!\n" );
return -1;
}
else if( strncmp( (char *)fileString, __FILE__, 512 ) != 0 )
{
log_info( "WARNING: __FILE__ defined, but to an unexpected value (%s)\n\tShould be: \"%s\"", fileString, __FILE__ );
return -1;
}
#if 0 // Removed by Affie's request 2/24
// One more try through: try with -cl-fast-relaxed-math to make sure the appropriate preprocessor gets defined
clProgramWrapper programB = clCreateProgramWithSource( context, 1, preprocessor_test, NULL, &error );
test_error( error, "Unable to create test program" );
// Try compiling
error = clBuildProgram( programB, 1, &deviceID, "-cl-fast-relaxed-math", NULL, NULL );
test_error( error, "Unable to build program" );
// Create a kernel again to run against
clKernelWrapper kernelB = clCreateKernel( programB, "test", &error );
test_error( error, "Unable to create testing kernel" );
// Set up and run
for( int i = 0; i < 3; i++ )
{
error = clSetKernelArg( kernelB, i, sizeof( streams[i] ), &streams[i] );
test_error( error, "Unable to set indexed kernel arguments" );
}
error = clEnqueueNDRangeKernel( queue, kernelB, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
// Only need the one read
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(results), results, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
// We only need to check the one result this time
if( results[ 5 ] == 0xf00baa )
{
log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ not defined!\n" );
return -1;
}
else if( results[ 5 ] != 1 )
{
log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ not defined to 1 (was %d)\n", results[ 5 ] );
return -1;
}
#endif
return 0;
}

View File

@@ -0,0 +1,244 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *bgra8888_kernel_code =
"\n"
"__kernel void test_bgra8888(read_only image2d_t srcimg, __global uchar4 *dst, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y)) * 255.0f;\n"
" dst[indx] = convert_uchar4_rte(color.zyxw);\n"
"\n"
"}\n";
static const char *rgba8888_kernel_code =
"\n"
"__kernel void test_rgba8888(read_only image2d_t srcimg, __global uchar4 *dst, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y)) * 255.0f;\n"
" dst[indx] = convert_uchar4_rte(color);\n"
"\n"
"}\n";
static unsigned char *
generate_8888_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32( d);
return ptr;
}
static int
verify_bgra8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
{
int i;
for (i=0; i<w*h; i++)
{
if (outptr[i] != image[i])
{
log_error("READ_IMAGE_BGRA_UNORM_INT8 test failed\n");
return -1;
}
}
log_info("READ_IMAGE_BGRA_UNORM_INT8 test passed\n");
return 0;
}
static int
verify_rgba8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
{
log_error("READ_IMAGE_RGBA_UNORM_INT8 test failed\n");
return -1;
}
}
log_info("READ_IMAGE_RGBA_UNORM_INT8 test passed\n");
return 0;
}
int test_readimage(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_program program[2];
cl_kernel kernel[2];
cl_image_format img_format;
unsigned char *input_ptr[2], *output_ptr;
size_t threads[2];
int img_width = 512;
int img_height = 512;
int i, err;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, 1};
size_t length = img_width * img_height * 4 * sizeof(unsigned char);
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
input_ptr[0] = generate_8888_image(img_width, img_height, d);
input_ptr[1] = generate_8888_image(img_width, img_height, d);
free_mtdata(d); d = NULL;
output_ptr = (unsigned char*)malloc(length);
img_format.image_channel_order = CL_BGRA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[0])
{
log_error("create_image_2d failed\n");
return -1;
}
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[1])
{
log_error("create_image_2d failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteImage failed\n");
return -1;
}
err = clEnqueueWriteImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteImage failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_kernel_code, "test_bgra8888" );
if (err)
return -1;
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_kernel_code, "test_rgba8888" );
if (err)
return -1;
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArg failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[1], 2, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArg failed\n");
return -1;
}
threads[0] = (unsigned int)img_width;
threads[1] = (unsigned int)img_height;
for (i=0; i<2; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
return -1;
}
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
switch (i)
{
case 0:
err = verify_bgra8888_image(input_ptr[i], output_ptr, img_width, img_height);
break;
case 1:
err = verify_rgba8888_image(input_ptr[i], output_ptr, img_width, img_height);
break;
}
if (err)
break;
}
// cleanup
clReleaseSampler(sampler);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
for (i=0; i<2; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,230 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *bgra8888_kernel_code =
"\n"
"__kernel void test_bgra8888(read_only image3d_t srcimg, __global float4 *dst, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int tid_z = get_global_id(2);\n"
" int indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
" dst[indx].x = color.z;\n"
" dst[indx].y = color.y;\n"
" dst[indx].z = color.x;\n"
" dst[indx].w = color.w;\n"
"\n"
"}\n";
static const char *rgba8888_kernel_code =
"\n"
"__kernel void test_rgba8888(read_only image3d_t srcimg, __global float4 *dst, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int tid_z = get_global_id(2);\n"
" int indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
" //indx *= 4;\n"
" dst[indx].x = color.x;\n"
" dst[indx].y = color.y;\n"
" dst[indx].z = color.z;\n"
" dst[indx].w = color.w;\n"
"\n"
"}\n";
static unsigned char *
generate_3d_image8(int w, int h, int d, MTdata data)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * d * 4);
int i;
for (i=0; i<w*h*d*4; i++)
ptr[i] = (unsigned char)genrand_int32(data);
return ptr;
}
static int
verify_3d_image8(double *image, float *outptr, int w, int h, int d)
{
int i;
for (i=0; i<w*h*d*4; i++)
{
if (outptr[i] != (float)image[i])
{
float ulps = Ulp_Error( outptr[i], image[i]);
if(! (fabsf(ulps) < 1.5f) )
{
log_error( "ERROR: Data sample %d does not validate! Expected (%a), got (%a), ulp %f\n",
(int)i, image[i], outptr[ i ], ulps );
return -1;
}
}
}
return 0;
}
static double *
prepare_reference(unsigned char * input_ptr, int w, int h, int d)
{
double *ptr = (double*)malloc(w * h * d * 4 * sizeof(double));
int i;
for (i=0; i<w*h*d*4; i++)
ptr[i] = ((double)input_ptr[i]/255);
return ptr;
}
int test_readimage3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_program program[2];
cl_kernel kernel[2];
cl_image_format img_format;
unsigned char *input_ptr[2];
float *output_ptr;
double *ref_ptr[2];
size_t threads[3];
int img_width = 64;
int img_height = 64;
int img_depth = 64;
int i, err;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, img_depth};
size_t length = img_width * img_height * img_depth * 4 * sizeof(float);
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
MTdata d = init_genrand( gRandomSeed );
input_ptr[0] = generate_3d_image8(img_width, img_height, img_depth, d);
input_ptr[1] = generate_3d_image8(img_width, img_height, img_depth, d);
ref_ptr[0] = prepare_reference(input_ptr[0], img_width, img_height, img_depth);
ref_ptr[1] = prepare_reference(input_ptr[1], img_width, img_height, img_depth);
free_mtdata(d); d = NULL;
output_ptr = (float*)malloc(length);
img_format.image_channel_order = CL_BGRA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
test_error(err, "clCreateBuffer failed");
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr[0], 0, NULL, NULL);
test_error(err, "clEnqueueWriteImage failed");
err = clEnqueueWriteImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, input_ptr[1], 0, NULL, NULL);
test_error(err, "clEnqueueWriteImage failed");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_kernel_code, "test_bgra8888" );
if (err)
return -1;
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_kernel_code, "test_rgba8888" );
if (err)
return -1;
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
test_error(err, "clSetKernelArg failed");
err = clSetKernelArg(kernel[1], 0, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[1], 2, sizeof sampler, &sampler);
test_error(err, "clSetKernelArg failed");
threads[0] = (unsigned int)img_width;
threads[1] = (unsigned int)img_height;
threads[2] = (unsigned int)img_depth;
for (i=0; i<2; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 3, NULL, threads, NULL, 0, NULL, NULL);
test_error(err, "clEnqueueNDRangeKernel failed");
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueReadBuffer failed");
switch (i)
{
case 0:
err = verify_3d_image8(ref_ptr[i], output_ptr, img_width, img_height, img_depth);
if ( err != 0 )
log_info("READ_IMAGE3D_BGRA_UNORM_INT8 test passed\n");
break;
case 1:
err = verify_3d_image8(ref_ptr[i], output_ptr, img_width, img_height, img_depth);
if ( err != 0 )
log_info("READ_IMAGE3D_RGBA_UNORM_INT8 test passed\n");
break;
}
if (err)
break;
}
// cleanup
clReleaseSampler(sampler);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
for (i=0; i<2; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(output_ptr);
free(ref_ptr[0]);
free(ref_ptr[1]);
return err;
}

View File

@@ -0,0 +1,147 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *rgbaFFFF_kernel_code =
"__kernel void test_rgbaFFFF(read_only image3d_t srcimg, __global float *dst, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int tid_z = get_global_id(2);\n"
" int indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
" indx *= 4;\n"
" dst[indx+0] = color.x;\n"
" dst[indx+1] = color.y;\n"
" dst[indx+2] = color.z;\n"
" dst[indx+3] = color.w;\n"
"\n"
"}\n";
static float *
generate_float_image(int w, int h, int d, MTdata data)
{
float *ptr = (float*)malloc(w * h * d * 4 * sizeof(float));
int i;
for (i=0; i<w*h*d*4; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, data);
return ptr;
}
static int
verify_float_image(float *image, float *outptr, int w, int h, int d)
{
int i;
for (i=0; i<w*h*d*4; i++)
{
if (outptr[i] != image[i])
{
log_error("READ_IMAGE3D_RGBA_FLOAT test failed\n");
return -1;
}
}
log_info("READ_IMAGE3D_RGBA_FLOAT test passed\n");
return 0;
}
int test_readimage3d_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_program program;
cl_kernel kernel;
cl_image_format img_format;
float *input_ptr, *output_ptr;
size_t threads[3];
int img_width = 64;
int img_height = 64;
int img_depth = 64;
int err;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, img_depth};
size_t length = img_width * img_height * img_depth * 4 * sizeof(float);
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
MTdata d = init_genrand( gRandomSeed );
input_ptr = generate_float_image(img_width, img_height, img_depth, d);
free_mtdata(d); d = NULL;
output_ptr = (float*)malloc(length);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
test_error(err, "clCreateBuffer failed");
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteImage failed");
err = create_single_kernel_helper(context, &program, &kernel, 1, &rgbaFFFF_kernel_code, "test_rgbaFFFF" );
if (err)
return -1;
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
test_error(err, "clSetKernelArg failed");
threads[0] = (unsigned int)img_width;
threads[1] = (unsigned int)img_height;
threads[2] = (unsigned int)img_depth;
err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL);
test_error(err, "clEnqueueNDRangeKernel failed");
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueReadBuffer failed");
err = verify_float_image(input_ptr, output_ptr, img_width, img_height, img_depth);
// cleanup
clReleaseSampler(sampler);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,146 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *rgba16_kernel_code =
"__kernel void test_rgba16(read_only image3d_t srcimg, __global ushort4 *dst, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int tid_z = get_global_id(2);\n"
" int indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
" ushort4 dst_write;\n"
" dst_write.x = convert_ushort_rte(color.x * 65535.0f);\n"
" dst_write.y = convert_ushort_rte(color.y * 65535.0f);\n"
" dst_write.z = convert_ushort_rte(color.z * 65535.0f);\n"
" dst_write.w = convert_ushort_rte(color.w * 65535.0f);\n"
" dst[indx] = dst_write;\n"
"\n"
"}\n";
static unsigned short *
generate_16bit_image(int w, int h, int d, MTdata data)
{
unsigned short *ptr = (cl_ushort*)malloc(w * h * d * 4 * sizeof(cl_ushort));
int i;
for (i=0; i<w*h*d*4; i++)
ptr[i] = (cl_ushort)genrand_int32(data);
return ptr;
}
static int
verify_16bit_image(cl_ushort *image, cl_ushort *outptr, int w, int h, int d)
{
int i;
for (i=0; i<w*h*d*4; i++)
{
if (outptr[i] != image[i])
{
log_error("READ_IMAGE3D_RGBA_UNORM_INT16 test failed\n");
return -1;
}
}
log_info("READ_IMAGE3D_RGBA_UNORM_INT16 test passed\n");
return 0;
}
int test_readimage3d_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_program program;
cl_kernel kernel;
cl_image_format img_format;
cl_ushort *input_ptr, *output_ptr;
size_t threads[3];
int img_width = 64;
int img_height = 64;
int img_depth = 64;
int err;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, img_depth};
size_t length = img_width * img_height * img_depth * 4 * sizeof(cl_ushort);
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
MTdata d = init_genrand( gRandomSeed );
input_ptr = generate_16bit_image(img_width, img_height, img_depth, d);
free_mtdata(d); d = NULL;
output_ptr = (cl_ushort*)malloc(length);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
test_error(err, "clCreateBuffer failed");
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteImage failed");
err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba16_kernel_code, "test_rgba16" );
if (err)
return -1;
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
test_error(err, "clSetKernelArg failed");
threads[0] = (unsigned int)img_width;
threads[1] = (unsigned int)img_height;
threads[2] = (unsigned int)img_depth;
err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL);
test_error(err, "clEnqueueNDRangeKernel failed");
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueReadBuffer failed");
err = verify_16bit_image(input_ptr, output_ptr, img_width, img_height, img_depth);
// cleanup
clReleaseSampler(sampler);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,167 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *rgbaFFFF_kernel_code =
"__kernel void test_rgbaFFFF(read_only image2d_t srcimg, __global float *dst, sampler_t smp)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, smp, (int2)(tid_x, tid_y));\n"
" indx *= 4;\n"
" dst[indx+0] = color.x;\n"
" dst[indx+1] = color.y;\n"
" dst[indx+2] = color.z;\n"
" dst[indx+3] = color.w;\n"
"\n"
"}\n";
static float *
generate_float_image(int w, int h, MTdata d)
{
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
return ptr;
}
static int
verify_float_image(float *image, float *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
{
log_error("READ_IMAGE_RGBA_FLOAT test failed\n");
return -1;
}
}
log_info("READ_IMAGE_RGBA_FLOAT test passed\n");
return 0;
}
int test_readimage_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_program program;
cl_kernel kernel;
cl_image_format img_format;
float *input_ptr, *output_ptr;
size_t threads[2];
int img_width = 512;
int img_height = 512;
int err;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, 1};
size_t length = img_width * img_height * 4 * sizeof(float);
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
input_ptr = generate_float_image(img_width, img_height, d);
free_mtdata(d); d = NULL;
output_ptr = (float*)malloc(length);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[0])
{
log_error("create_image_2d failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateArray failed\n");
return -1;
}
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteImage failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &rgbaFFFF_kernel_code, "test_rgbaFFFF" );
if (err)
return -1;
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)img_width;
threads[1] = (unsigned int)img_height;
err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_float_image(input_ptr, output_ptr, img_width, img_height);
// cleanup
clReleaseSampler(sampler);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,166 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *rgba16_kernel_code =
"__kernel void test_rgba16(read_only image2d_t srcimg, __global ushort4 *dst, sampler_t smp)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, smp, (int2)(tid_x, tid_y));\n"
" ushort4 dst_write;\n"
" dst_write.x = convert_ushort_rte(color.x * 65535.0f);\n"
" dst_write.y = convert_ushort_rte(color.y * 65535.0f);\n"
" dst_write.z = convert_ushort_rte(color.z * 65535.0f);\n"
" dst_write.w = convert_ushort_rte(color.w * 65535.0f);\n"
" dst[indx] = dst_write;\n"
"\n"
"}\n";
static unsigned short *
generate_16bit_image(int w, int h, MTdata d)
{
cl_ushort *ptr = (cl_ushort*)malloc(w * h * 4 * sizeof(cl_ushort));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (cl_ushort)genrand_int32(d);
return ptr;
}
static int
verify_16bit_image(cl_ushort *image, cl_ushort *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
{
log_error("READ_IMAGE_RGBA_UNORM_INT16 test failed\n");
return -1;
}
}
log_info("READ_IMAGE_RGBA_UNORM_INT16 test passed\n");
return 0;
}
int test_readimage_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_program program;
cl_kernel kernel;
cl_image_format img_format;
cl_ushort *input_ptr, *output_ptr;
size_t threads[2];
int img_width = 512;
int img_height = 512;
int err;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, 1};
size_t length = img_width * img_height * 4 * sizeof(cl_ushort);
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
input_ptr = generate_16bit_image(img_width, img_height, d);
free_mtdata(d); d = NULL;
output_ptr = (cl_ushort*)malloc(length);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[0])
{
log_error("create_image_2d failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateArray failed\n");
return -1;
}
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteImage failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba16_kernel_code, "test_rgba16" );
if (err)
return -1;
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)img_width;
threads[1] = (unsigned int)img_height;
err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_16bit_image(input_ptr, output_ptr, img_width, img_height);
// cleanup
clReleaseSampler(sampler);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,396 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
cl_int get_type_size( cl_context context, cl_command_queue queue, const char *type, cl_ulong *size )
{
const char *sizeof_kernel_code[4] =
{
"", /* optional pragma string */
"__kernel __attribute__((reqd_work_group_size(1,1,1))) void test_sizeof(__global uint *dst) \n"
"{\n"
" dst[0] = (uint) sizeof( ", type, " );\n"
"}\n"
};
cl_program p;
cl_kernel k;
cl_mem m;
cl_uint temp;
if (!strncmp(type, "double", 6))
{
sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
}
else if (!strncmp(type, "half", 4))
{
sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
}
cl_int err = create_single_kernel_helper( context, &p, &k, 4, sizeof_kernel_code, "test_sizeof" );
if( err )
return err;
m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err );
if( NULL == m )
{
clReleaseProgram( p );
clReleaseKernel( k );
log_error("\nclCreateBuffer FAILED\n");
return err;
}
err = clSetKernelArg( k, 0, sizeof( cl_mem ), &m );
if( err )
{
clReleaseProgram( p );
clReleaseKernel( k );
clReleaseMemObject( m );
log_error("\nclSetKernelArg FAILED\n");
return err;
}
err = clEnqueueTask( queue, k, 0, NULL, NULL );
clReleaseProgram( p );
clReleaseKernel( k );
if( err )
{
clReleaseMemObject( m );
log_error( "\nclEnqueueTask FAILED\n" );
return err;
}
err = clEnqueueReadBuffer( queue, m, CL_TRUE, 0, sizeof( cl_uint ), &temp, 0, NULL, NULL );
clReleaseMemObject( m );
if( err )
log_error( "\nclEnqueueReadBuffer FAILED\n" );
*size = (cl_ulong) temp;
return err;
}
typedef struct size_table
{
const char *name;
cl_ulong size;
cl_ulong cl_size;
}size_table;
const size_table scalar_table[] =
{
// Fixed size entries from table 6.1
{ "char", 1, sizeof( cl_char ) },
{ "uchar", 1, sizeof( cl_uchar) },
{ "unsigned char", 1, sizeof( cl_uchar) },
{ "short", 2, sizeof( cl_short) },
{ "ushort", 2, sizeof( cl_ushort) },
{ "unsigned short", 2, sizeof( cl_ushort) },
{ "int", 4, sizeof( cl_int ) },
{ "uint", 4, sizeof( cl_uint) },
{ "unsigned int", 4, sizeof( cl_uint) },
{ "float", 4, sizeof( cl_float) },
{ "long", 8, sizeof( cl_long ) },
{ "ulong", 8, sizeof( cl_ulong) },
{ "unsigned long", 8, sizeof( cl_ulong) }
};
const size_table vector_table[] =
{
// Fixed size entries from table 6.1
{ "char", 1, sizeof( cl_char ) },
{ "uchar", 1, sizeof( cl_uchar) },
{ "short", 2, sizeof( cl_short) },
{ "ushort", 2, sizeof( cl_ushort) },
{ "int", 4, sizeof( cl_int ) },
{ "uint", 4, sizeof( cl_uint) },
{ "float", 4, sizeof( cl_float) },
{ "long", 8, sizeof( cl_long ) },
{ "ulong", 8, sizeof( cl_ulong) }
};
const char *ptr_table[] =
{
"void*",
"size_t",
"sizeof(int)", // check return type of sizeof
"ptrdiff_t"
};
const char *other_types[] =
{
"event_t",
"image2d_t",
"image3d_t",
"sampler_t"
};
static int IsPowerOfTwo( cl_ulong x ){ return 0 == (x & (x-1)); }
int test_sizeof(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
size_t i, j;
cl_ulong test;
cl_uint ptr_size = CL_UINT_MAX;
cl_int err = CL_SUCCESS;
// Check address space size
err = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(ptr_size), &ptr_size, NULL);
if( err || ptr_size > 64)
{
log_error( "FAILED: Unable to get CL_DEVICE_ADDRESS_BITS for device %p\n", device );
return -1;
}
log_info( "\tCL_DEVICE_ADDRESS_BITS = %u\n", ptr_size );
ptr_size /= 8;
// Test standard scalar sizes
for( i = 0; i < sizeof( scalar_table ) / sizeof( scalar_table[0] ); i++ )
{
if( ! gHasLong &&
(0 == strcmp(scalar_table[i].name, "long") ||
0 == strcmp(scalar_table[i].name, "ulong") ||
0 == strcmp(scalar_table[i].name, "unsigned long")))
{
log_info("\nLongs are not supported by this device. Skipping test.\t");
continue;
}
test = CL_ULONG_MAX;
err = get_type_size( context, queue, scalar_table[i].name, &test );
if( err )
return err;
if( test != scalar_table[i].size )
{
log_error( "\nFAILED: Type %s has size %lld, but expected size %lld!\n", scalar_table[i].name, test, scalar_table[i].size );
return -1;
}
if( test != scalar_table[i].cl_size )
{
log_error( "\nFAILED: Type %s has size %lld, but cl_ size is %lld!\n", scalar_table[i].name, test, scalar_table[i].cl_size );
return -2;
}
log_info( "%16s", scalar_table[i].name );
}
log_info( "\n" );
// Test standard vector sizes
for( j = 2; j <= 16; j *= 2 )
{
// For each vector size, iterate through types
for( i = 0; i < sizeof( vector_table ) / sizeof( vector_table[0] ); i++ )
{
if( !gHasLong &&
(0 == strcmp(vector_table[i].name, "long") ||
0 == strcmp(vector_table[i].name, "ulong")))
{
log_info("\nLongs are not supported by this device. Skipping test.\t");
continue;
}
char name[32];
sprintf( name, "%s%ld", vector_table[i].name, j );
test = CL_ULONG_MAX;
err = get_type_size( context, queue, name, &test );
if( err )
return err;
if( test != j * vector_table[i].size )
{
log_error( "\nFAILED: Type %s has size %lld, but expected size %lld!\n", name, test, j * vector_table[i].size );
return -1;
}
if( test != j * vector_table[i].cl_size )
{
log_error( "\nFAILED: Type %s has size %lld, but cl_ size is %lld!\n", name, test, j * vector_table[i].cl_size );
return -2;
}
log_info( "%16s", name );
}
log_info( "\n" );
}
//Check that pointer sizes are correct
for( i = 0; i < sizeof( ptr_table ) / sizeof( ptr_table[0] ); i++ )
{
test = CL_ULONG_MAX;
err = get_type_size( context, queue, ptr_table[i], &test );
if( err )
return err;
if( test != ptr_size )
{
log_error( "\nFAILED: Type %s has size %lld, but expected size %u!\n", ptr_table[i], test, ptr_size );
return -1;
}
log_info( "%16s", ptr_table[i] );
}
// Check that intptr_t is large enough
test = CL_ULONG_MAX;
err = get_type_size( context, queue, "intptr_t", &test );
if( err )
return err;
if( test < ptr_size )
{
log_error( "\nFAILED: intptr_t has size %lld, but must be at least %u!\n", test, ptr_size );
return -1;
}
if( ! IsPowerOfTwo( test ) )
{
log_error( "\nFAILED: sizeof(intptr_t) is %lld, but must be a power of two!\n", test );
return -2;
}
log_info( "%16s", "intptr_t" );
// Check that uintptr_t is large enough
test = CL_ULONG_MAX;
err = get_type_size( context, queue, "uintptr_t", &test );
if( err )
return err;
if( test < ptr_size )
{
log_error( "\nFAILED: uintptr_t has size %lld, but must be at least %u!\n", test, ptr_size );
return -1;
}
if( ! IsPowerOfTwo( test ) )
{
log_error( "\nFAILED: sizeof(uintptr_t) is %lld, but must be a power of two!\n", test );
return -2;
}
log_info( "%16s\n", "uintptr_t" );
//Check that other types are powers of two
for( i = 0; i < sizeof( other_types ) / sizeof( other_types[0] ); i++ )
{
if( 0 == strcmp(other_types[i], "image2d_t") &&
checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
{
log_info("\nimages are not supported by this device. Skipping test.\t");
continue;
}
if( gIsEmbedded &&
0 == strcmp(other_types[i], "image3d_t") &&
checkFor3DImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
{
log_info("\n3D images are not supported by this device. Skipping test.\t");
continue;
}
if( 0 == strcmp(other_types[i], "sampler_t") &&
checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
{
log_info("\nimages are not supported by this device. Skipping test.\t");
continue;
}
test = CL_ULONG_MAX;
err = get_type_size( context, queue, other_types[i], &test );
if( err )
return err;
if( ! IsPowerOfTwo( test ) )
{
log_error( "\nFAILED: Type %s has size %lld, which is not a power of two (section 6.1.5)!\n", other_types[i], test );
return -1;
}
log_info( "%16s", other_types[i] );
}
log_info( "\n" );
//Check double
if( is_extension_available( device, "cl_khr_fp64" ) )
{
log_info( "\tcl_khr_fp64:" );
test = CL_ULONG_MAX;
err = get_type_size( context, queue, "double", &test );
if( err )
return err;
if( test != 8 )
{
log_error( "\nFAILED: double has size %lld, but must be 8!\n", test );
return -1;
}
log_info( "%16s", "double" );
// Test standard vector sizes
for( j = 2; j <= 16; j *= 2 )
{
char name[32];
sprintf( name, "double%ld", j );
test = CL_ULONG_MAX;
err = get_type_size( context, queue, name, &test );
if( err )
return err;
if( test != 8*j )
{
log_error( "\nFAILED: %s has size %lld, but must be %ld!\n", name, test, 8 * j);
return -1;
}
log_info( "%16s", name );
}
log_info( "\n" );
}
//Check half
if( is_extension_available( device, "cl_khr_fp16" ) )
{
log_info( "\tcl_khr_fp16:" );
test = CL_ULONG_MAX;
err = get_type_size( context, queue, "half", &test );
if( err )
return err;
if( test != 2 )
{
log_error( "\nFAILED: half has size %lld, but must be 2!\n", test );
return -1;
}
log_info( "%16s", "half" );
// Test standard vector sizes
for( j = 2; j <= 16; j *= 2 )
{
char name[32];
sprintf( name, "half%ld", j );
test = CL_ULONG_MAX;
err = get_type_size( context, queue, name, &test );
if( err )
return err;
if( test != 2*j )
{
log_error( "\nFAILED: %s has size %lld, but must be %ld!\n", name, test, 2 * j);
return -1;
}
log_info( "%16s", name );
}
log_info( "\n" );
}
return err;
}

View File

@@ -0,0 +1,97 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
static const char *sample_kernel = {
"%s\n" // optional pragma string
"__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" dst[tid] = src[tid];\n"
"\n"
"}\n"
};
int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
int vec_type_index, vec_size_index;
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
const char *size_names[] = {"", "2", "4", "8", "16"};
char *program_source;
program_source = (char*)malloc(sizeof(char)*4096);
for (vec_type_index=0; vec_type_index<10; vec_type_index++) {
if (vecType[vec_type_index] == kDouble) {
if (!is_extension_available(deviceID, "cl_khr_fp64")) {
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
continue;
}
log_info("Testing doubles.\n");
}
for (vec_size_index=0; vec_size_index<5; vec_size_index++) {
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper in, out;
size_t global[] = {1,1,1};
log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
program_source[0] = '\0';
sprintf(program_source, sample_kernel,
(vecType[vec_type_index] == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&program_source, "sample_test" );
if( error != 0 )
return error;
in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*10, NULL, &error);
test_error(error, "clCreateBuffer failed");
out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*10, NULL, &error);
test_error(error, "clCreateBuffer failed");
error = clSetKernelArg(kernel, 0, sizeof(in), &in);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 1, sizeof(out), &out);
test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
}
}
free(program_source);
return 0;
}

View File

@@ -0,0 +1,406 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/errorHelpers.h"
#define DEBUG 0
#define DEPTH 16
// Limit the maximum code size for any given kernel.
#define MAX_CODE_SIZE (1024*32)
const int sizes[] = {1, 2, 3, 4, 8, 16, -1, -1, -1, -1};
const char *size_names[] = {"", "2", "3", "4", "8", "16" , "!!a", "!!b", "!!c", "!!d"};
// Creates a kernel by enumerating all possible ways of building the vector out of vloads
// skip_to_results will skip results up to a given number. If the amount of code generated
// is greater than MAX_CODE_SIZE, this function will return the number of results used,
// which can then be used as the skip_to_result value to continue where it left off.
int create_kernel(ExplicitType type, int output_size, char *program, int *number_of_results, int skip_to_result) {
int number_of_sizes;
switch (output_size) {
case 1:
number_of_sizes = 1;
break;
case 2:
number_of_sizes = 2;
break;
case 3:
number_of_sizes = 3;
break;
case 4:
number_of_sizes = 4;
break;
case 8:
number_of_sizes = 5;
break;
case 16:
number_of_sizes = 6;
break;
default:
log_error("Invalid size: %d\n", output_size);
return -1;
}
int total_results = 0;
int current_result = 0;
int total_vloads = 0;
int total_program_length = 0;
int aborted_due_to_size = 0;
if (skip_to_result < 0)
skip_to_result = 0;
// The line of code for the vector creation
char line[1024];
// Keep track of what size vector we are using in each position so we can iterate through all fo them
int pos[DEPTH];
int max_size = output_size;
if (DEBUG > 1) log_info("max_size: %d\n", max_size);
program[0] = '\0';
sprintf(program, "%s\n__kernel void test_vector_creation(__global %s *src, __global %s%s *result) {\n",
type == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name(type), get_explicit_type_name(type), ( number_of_sizes == 3 ) ? "" : size_names[number_of_sizes-1]);
total_program_length += (int)strlen(program);
char storePrefix[ 128 ], storeSuffix[ 128 ];
// Start out trying sizes 1,1,1,1,1...
for (int i=0; i<DEPTH; i++)
pos[i] = 0;
int done = 0;
while (!done) {
if (DEBUG > 1) {
log_info("pos size[] = [");
for (int k=0; k<DEPTH; k++)
log_info(" %d ", pos[k]);
log_info("]\n");
}
// Go through the selected vector sizes and see if the first n of them fit the
// required size exactly.
int size_so_far = 0;
int vloads;
for ( vloads=0; vloads<DEPTH; vloads++) {
if (size_so_far + sizes[pos[vloads]] <= max_size) {
size_so_far += sizes[pos[vloads]];
} else {
break;
}
}
if (DEBUG > 1) log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far);
// If they did not fit the required size exactly it is too long, so there is no point in checking any other combinations
// of the sizes to the right. Prune them from the search.
if (size_so_far != max_size) {
// Zero all the sizes to the right
for (int k=vloads+1; k<DEPTH; k++) {
pos[k] = 0;
}
// Increment this current size and propagate the values up if needed
for (int d=vloads; d>=0; d--) {
pos[d]++;
if (pos[d] >= number_of_sizes) {
pos[d] = 0;
if (d == 0) {
// If we rolled over then we are done
done = 1;
break;
}
} else {
break;
}
}
// Go on to the next size since this one (and all others "under" it) didn't fit
continue;
}
// Generate the actual load line if we are building this part
line[0]= '\0';
if (skip_to_result == 0 || total_results >= skip_to_result) {
if( number_of_sizes == 3 )
{
sprintf( storePrefix, "vstore3( " );
sprintf( storeSuffix, ", %d, result )", current_result );
}
else
{
sprintf( storePrefix, "result[%d] = ", current_result );
storeSuffix[ 0 ] = 0;
}
sprintf(line, "\t%s(%s%d)(", storePrefix, get_explicit_type_name(type), output_size);
current_result++;
int offset = 0;
for (int i=0; i<vloads; i++) {
if (pos[i] == 0)
sprintf(line + strlen(line), "src[%d]", offset);
else
sprintf(line + strlen(line), "vload%s(0,src+%d)", size_names[pos[i]], offset);
offset += sizes[pos[i]];
if (i<(vloads-1))
sprintf(line + strlen(line), ",");
}
sprintf(line + strlen(line), ")%s;\n", storeSuffix);
strcat(program, line);
total_vloads += vloads;
}
total_results++;
total_program_length += (int)strlen(line);
if (total_program_length > MAX_CODE_SIZE) {
aborted_due_to_size = 1;
done = 1;
}
if (DEBUG) log_info("line is: %s", line);
// If we did not use all of them, then we ignore any changes further to the right.
// We do this by causing those loops to skip on the next iteration.
if (vloads < DEPTH) {
if (DEBUG > 1) log_info("done with this depth\n");
for (int k=vloads; k<DEPTH; k++)
pos[k] = number_of_sizes;
}
// Increment the far right size by 1, rolling over as needed
for (int d=DEPTH-1; d>=0; d--) {
pos[d]++;
if (pos[d] >= number_of_sizes) {
pos[d] = 0;
if (d == 0) {
// If we rolled over at the far-left then we are done
done = 1;
break;
}
} else {
break;
}
}
if (done)
break;
// Continue until we are done.
}
strcat(program, "}\n\n"); //log_info("%s\n", program);
total_program_length += 3;
if (DEBUG) log_info("\t\t(Program for vector type %s%s contains %d vector creations, of total program length %gkB, with a total of %d vloads.)\n",
get_explicit_type_name(type), size_names[number_of_sizes-1], total_results, total_program_length/1024.0, total_vloads);
*number_of_results = current_result;
if (aborted_due_to_size)
return total_results;
return 0;
}
int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16};
char *program_source;
int error;
int total_errors = 0;
cl_int input_data_int[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_double input_data_double[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
void *input_data_converted;
void *output_data;
int number_of_results;;
input_data_converted = malloc(sizeof(cl_double)*16);
program_source = (char*)malloc(sizeof(char)*1024*1024*4);
// Iterate over all the types
for (int type_index=0; type_index<10; type_index++) {
if(!gHasLong && ((vecType[type_index] == kLong) || (vecType[type_index] == kULong)))
{
log_info("Long/ULong data type not supported on this device\n");
continue;
}
clMemWrapper input;
if (vecType[type_index] == kDouble) {
if (!is_extension_available(deviceID, "cl_khr_fp64")) {
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
continue;
}
log_info("Testing doubles.\n");
}
// Convert the data to the right format for the test.
memset(input_data_converted, 0xff, sizeof(cl_double)*16);
if (vecType[type_index] != kDouble) {
for (int j=0; j<16; j++) {
convert_explicit_value(&input_data_int[j], ((char*)input_data_converted)+get_explicit_type_size(vecType[type_index])*j,
kInt, 0, kRoundToEven, vecType[type_index]);
}
} else {
memcpy(input_data_converted, &input_data_double, sizeof(cl_double)*16);
}
input = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, get_explicit_type_size(vecType[type_index])*16,
(vecType[type_index] != kDouble) ? input_data_converted : input_data_double, &error);
if (error) {
print_error(error, "clCreateBuffer failed");
total_errors++;
continue;
}
// Iterate over all the vector sizes.
for (int size_index=1; size_index< 5; size_index++) {
size_t global[] = {1,1,1};
int number_generated = -1;
int previous_number_generated = 0;
log_info("Testing %s%s...\n", get_explicit_type_name(vecType[type_index]), size_names[size_index]);
while (number_generated != 0) {
clMemWrapper output;
clKernelWrapper kernel;
clProgramWrapper program;
number_generated = create_kernel(vecType[type_index], vecSizes[size_index], program_source, &number_of_results, number_generated);
if (number_generated != 0) {
if (previous_number_generated == 0)
log_info("Code size greater than %gkB; splitting test into multiple kernels.\n", MAX_CODE_SIZE/1024.0);
log_info("\tExecuting vector permutations %d to %d...\n", previous_number_generated, number_generated-1);
}
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&program_source, "test_vector_creation");
if (error) {
log_error("create_single_kernel_helper failed.\n");
total_errors++;
break;
}
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
NULL, &error);
if (error) {
print_error(error, "clCreateBuffer failed");
total_errors++;
break;
}
error = clSetKernelArg(kernel, 0, sizeof(input), &input);
error |= clSetKernelArg(kernel, 1, sizeof(output), &output);
if (error) {
print_error(error, "clSetKernelArg failed");
total_errors++;
break;
}
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
if (error) {
print_error(error, "clEnqueueNDRangeKernel failed");
total_errors++;
break;
}
error = clFinish(queue);
if (error) {
print_error(error, "clFinish failed");
total_errors++;
break;
}
output_data = malloc(number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
if (output_data == NULL) {
log_error("Failed to allocate memory for output data.\n");
total_errors++;
break;
}
memset(output_data, 0xff, number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
error = clEnqueueReadBuffer(queue, output, CL_TRUE, 0,
number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
output_data, 0, NULL, NULL);
if (error) {
print_error(error, "clEnqueueReadBuffer failed");
total_errors++;
free(output_data);
break;
}
// Check the results
char *res = (char *)output_data;
char *exp = (char *)input_data_converted;
for (int i=0; i<number_of_results; i++) {
// If they do not match, then print out why
if (memcmp(input_data_converted,
res + i*(get_explicit_type_size(vecType[type_index])*vecSizes[size_index]),
get_explicit_type_size(vecType[type_index])*vecSizes[size_index])
) {
log_error("Data failed to validate for result %d\n", i);
// Find the line in the program that failed. This is ugly.
char search[32];
char found_line[1024];
found_line[0]='\0';
search[0]='\0';
sprintf(search, "result[%d] = (", i);
char *start_loc = strstr(program_source, search);
if (start_loc == NULL)
log_error("Failed to find program source for failure for %s in \n%s", search, program_source);
else {
char *end_loc = strstr(start_loc, "\n");
memcpy(&found_line, start_loc, (end_loc-start_loc));
found_line[end_loc-start_loc]='\0';
log_error("Failed vector line: %s\n", found_line);
}
for (int j=0; j<(int)vecSizes[size_index]; j++) {
char expected_value[64];
char returned_value[64];
expected_value[0]='\0';
returned_value[0]='\0';
print_type_to_string(vecType[type_index], (void*)(res+get_explicit_type_size(vecType[type_index])*(i*vecSizes[size_index]+j)), returned_value);
print_type_to_string(vecType[type_index], (void*)(exp+get_explicit_type_size(vecType[type_index])*j), expected_value);
log_error("index [%d, component %d]: got: %s expected: %s\n", i, j,
returned_value, expected_value);
}
total_errors++;
}
}
free(output_data);
previous_number_generated = number_generated;
} // number_generated != 0
} // vector sizes
} // vector types
free(input_data_converted);
free(program_source);
return total_errors;
}

View File

@@ -0,0 +1,985 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/errorHelpers.h"
// Outputs debug information for stores
#define DEBUG 0
// Forces stores/loads to be done with offsets = tid
#define LINEAR_OFFSETS 0
#define NUM_LOADS 512
static const char *doubleExtensionPragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
#pragma mark -------------------- vload harness --------------------------
typedef void (*create_vload_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize );
int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
create_vload_program_fn createFn, size_t bufferSize, MTdata d )
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 4 ];
const size_t numLoads = (DEBUG) ? 16 : NUM_LOADS;
if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128;
size_t threads[ 1 ], localThreads[ 1 ];
clProtectedArray inBuffer( bufferSize );
char programSrc[ 10240 ];
cl_uint offsets[ numLoads ], alignmentOffsets[ numLoads ];
size_t numElements, typeSize, i;
unsigned int outVectorSize;
typeSize = get_explicit_type_size( type );
numElements = bufferSize / ( typeSize * vecSize );
bufferSize = numElements * typeSize * vecSize; // To account for rounding
if (DEBUG) log_info("Testing: numLoads: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numLoads, (int)typeSize, vecSize, (int)numElements, (int)bufferSize);
// Create some random input data and random offsets to load from
generate_random_data( type, numElements * vecSize, d, (void *)inBuffer );
for( i = 0; i < numLoads; i++ )
{
offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 1, d );
if( offsets[ i ] < numElements - 2 )
alignmentOffsets[ i ] = (cl_uint)random_in_range( 0, (int)vecSize - 1, d );
else
alignmentOffsets[ i ] = 0;
if (LINEAR_OFFSETS) offsets[i] = (cl_uint)i;
}
if (LINEAR_OFFSETS) log_info("Offsets set to thread IDs to simplify output.\n");
// 32-bit fixup
outVectorSize = vecSize;
// Declare output buffers now
#if !(defined(_WIN32) && defined(_MSC_VER))
char outBuffer[ numLoads * typeSize * outVectorSize ];
char referenceBuffer[ numLoads * typeSize * vecSize ];
#else
char* outBuffer = (char*)_malloca(numLoads * typeSize * outVectorSize * sizeof(cl_char));
char* referenceBuffer = (char*)_malloca(numLoads * typeSize * vecSize * sizeof(cl_char));
#endif
// Create the program
createFn( programSrc, numElements, type, vecSize, outVectorSize);
// Create our kernel
const char *ptr = programSrc;
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
// Get the number of args to differentiate the kernels with local storage. (They have 5)
cl_uint numArgs;
error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
test_error( error, "clGetKernelInfo failed");
// Set up parameters
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, bufferSize, (void *)inBuffer, &error );
test_error( error, "Unable to create kernel stream" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(offsets[0]), offsets, &error );
test_error( error, "Unable to create kernel stream" );
streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(alignmentOffsets[0]), alignmentOffsets, &error );
test_error( error, "Unable to create kernel stream" );
streams[ 3 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*typeSize*outVectorSize, (void *)outBuffer, &error );
test_error( error, "Unable to create kernel stream" );
// Set parameters and run
if (numArgs == 5) {
// We need to set the size of the local storage
error = clSetKernelArg(kernel, 0, bufferSize, NULL);
test_error( error, "clSetKernelArg for buffer failed");
for( i = 0; i < 4; i++ )
{
error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] );
test_error( error, "Unable to set kernel argument" );
}
} else {
// No local storage
for( i = 0; i < 4; i++ )
{
error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] );
test_error( error, "Unable to set kernel argument" );
}
}
threads[ 0 ] = numLoads;
error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
test_error( error, "Unable to get local thread size" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to exec kernel" );
// Get the results
error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, numLoads * typeSize * outVectorSize * sizeof(cl_char), (void *)outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Create the reference results
memset( referenceBuffer, 0, numLoads * typeSize * vecSize * sizeof(cl_char));
for( i = 0; i < numLoads; i++ )
{
memcpy( referenceBuffer + i * typeSize * vecSize, ( (char *)(void *)inBuffer ) + ( ( offsets[ i ] * vecSize ) + alignmentOffsets[ i ] ) * typeSize,
typeSize * vecSize );
}
// Validate the results now
char *expected = referenceBuffer;
char *actual = outBuffer;
char *in = (char *)(void *)inBuffer;
if (DEBUG) {
log_info("Memory contents:\n");
for (i=0; i<numElements; i++) {
char inString[1024];
char expectedString[ 1024 ], actualString[ 1024 ];
if (i < numLoads) {
log_info("buffer %3d: input: %s expected: %s got: %s (load offset %3d, alignment offset %3d)", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ),
GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
GetDataVectorString( &(actual[i*typeSize*outVectorSize]), typeSize, vecSize, actualString ),
offsets[i], alignmentOffsets[i]);
if (memcmp(&(expected[i*typeSize*vecSize]), &(actual[i*typeSize*outVectorSize]), typeSize * vecSize) != 0)
log_error(" << ERROR\n");
else
log_info("\n");
} else {
log_info("buffer %3d: input: %s expected: %s got: %s\n", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ),
GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
GetDataVectorString( &(actual[i*typeSize*outVectorSize]), typeSize, vecSize, actualString ));
}
}
}
for( i = 0; i < numLoads; i++ )
{
if( memcmp( expected, actual, typeSize * vecSize ) != 0 )
{
char expectedString[ 1024 ], actualString[ 1024 ];
log_error( "ERROR: Data sample %d for vload of %s%d did not validate (expected {%s}, got {%s}, loaded from offset %d)\n",
(int)i, get_explicit_type_name( type ), vecSize, GetDataVectorString( expected, typeSize, vecSize, expectedString ),
GetDataVectorString( actual, typeSize, vecSize, actualString ), (int)offsets[ i ] );
return 1;
}
expected += typeSize * vecSize;
actual += typeSize * outVectorSize;
}
return 0;
}
int test_vloadset(cl_device_id device, cl_context context, cl_command_queue queue, create_vload_program_fn createFn, size_t bufferSize )
{
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
const char *size_names[] = { "2", "3", "4", "8", "16"};
unsigned int typeIdx, sizeIdx;
int error = 0;
MTdata mtData = init_genrand( gRandomSeed );
log_info("Testing with buffer size of %d.\n", (int)bufferSize);
for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
{
if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
continue;
if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
continue;
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
{
log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
int error_this_type = test_vload( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, mtData );
if (error_this_type) {
error += error_this_type;
log_error("Failure; skipping further sizes for this type.");
break;
}
}
}
free_mtdata(mtData);
return error;
}
#pragma mark -------------------- vload test cases --------------------------
void create_global_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
{
const char *pattern =
"%s%s"
"__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%d tmp = vload%d( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
" results[ tid ] = tmp;\n"
"}\n";
const char *patternV3 =
"%s%s"
"__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s3 tmp = vload3( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
" results[ 3*tid ] = tmp.s0;\n"
" results[ 3*tid+1 ] = tmp.s1;\n"
" results[ 3*tid+2 ] = tmp.s2;\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, typeName, typeName, typeName );
} else {
sprintf( destBuffer, pattern, type == kDouble ? doubleExtensionPragma : "",
"",
typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
(int)inVectorSize, typeName );
}
}
int test_vload_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
return test_vloadset( device, context, queue, create_global_load_code, 10240 );
}
void create_local_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
{
const char *pattern =
"%s%s"
//" __local %s%d sSharedStorage[ %d ];\n"
"__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" int lid = get_local_id( 0 );\n"
"\n"
" if( lid == 0 )\n"
" {\n"
" for( int i = 0; i < %d; i++ )\n"
" sSharedStorage[ i ] = src[ i ];\n"
" }\n"
// Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
// threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
// updated on all threads at that point
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
" %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
" results[ tid ] = tmp;\n"
"}\n";
const char *patternV3 =
"%s%s"
//" __local %s%d sSharedStorage[ %d ];\n"
"__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" int lid = get_local_id( 0 );\n"
"\n"
" if( lid == 0 )\n"
" {\n"
" for( int i = 0; i < %d; i++ ) {\n"
" sSharedStorage[ 3*i ] = src[ 3*i ];\n"
" sSharedStorage[ 3*i +1] = src[ 3*i +1];\n"
" sSharedStorage[ 3*i +2] = src[ 3*i +2];\n"
" }\n"
" }\n"
// Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
// threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
// updated on all threads at that point
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
" %s3 tmp = vload3( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
" results[ 3*tid ] = tmp.s0;\n"
" results[ 3*tid +1] = tmp.s1;\n"
" results[ 3*tid +2] = tmp.s2;\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, /*(int)inBufferSize,*/
typeName, typeName,
(int)inBufferSize,
typeName, typeName );
} else {
sprintf( destBuffer, pattern,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, (int)inVectorSize, /*(int)inBufferSize,*/
typeName, (int)inVectorSize, typeName, (int)outVectorSize,
(int)inBufferSize,
typeName, (int)inVectorSize, (int)inVectorSize, typeName );
}
}
int test_vload_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// Determine the max size of a local buffer that we can test against
cl_ulong localSize;
int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
test_error( error, "Unable to get max size of local memory buffer" );
if( localSize > 10240 )
localSize = 10240;
if (localSize > 4096)
localSize -= 2048;
else
localSize /= 2;
return test_vloadset( device, context, queue, create_local_load_code, (size_t)localSize );
}
void create_constant_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
{
const char *pattern =
"%s%s"
"__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%d tmp = vload%d( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
" results[ tid ] = tmp;\n"
"}\n";
const char *patternV3 =
"%s%s"
"__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s3 tmp = vload3( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
" results[ 3*tid ] = tmp.s0;\n"
" results[ 3*tid+1 ] = tmp.s1;\n"
" results[ 3*tid+2 ] = tmp.s2;\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, typeName, typeName,
typeName );
} else {
sprintf( destBuffer, pattern,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
(int)inVectorSize, typeName );
}
}
int test_vload_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// Determine the max size of a local buffer that we can test against
cl_ulong maxSize;
int error = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, NULL );
test_error( error, "Unable to get max size of constant memory buffer" );
if( maxSize > 10240 )
maxSize = 10240;
if (maxSize > 4096)
maxSize -= 2048;
else
maxSize /= 2;
return test_vloadset( device, context, queue, create_constant_load_code, (size_t)maxSize );
}
void create_private_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
{
const char *pattern =
"%s%s"
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
"#define PRIV_TYPE %s%d\n"
"#define PRIV_SIZE %d\n"
"__kernel void test_fn( __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
"{\n"
" __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
" int tid = get_global_id( 0 );\n"
"\n"
" for( int i = 0; i < %d; i++ )\n"
" sPrivateStorage[ i ] = src[ i ];\n"
// Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
// anybody else to sync up
"\n"
" %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
" results[ tid ] = tmp;\n"
"}\n";
const char *patternV3 =
"%s%s"
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
"#define PRIV_TYPE %s\n"
"#define PRIV_SIZE %d\n"
"__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
"{\n"
" __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
" int tid = get_global_id( 0 );\n"
"\n"
" for( int i = 0; i < PRIV_SIZE; i++ )\n"
" {\n"
" sPrivateStorage[ i ] = src[ i ];\n"
" }\n"
// Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
// anybody else to sync up
"\n"
" %s3 tmp = vload3( offsets[ tid ], ( sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
" results[ 3*tid ] = tmp.s0;\n"
" results[ 3*tid+1 ] = tmp.s1;\n"
" results[ 3*tid+2 ] = tmp.s2;\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize ==3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, 3*((int)inBufferSize),
typeName, typeName,
typeName );
// log_info("Src is \"\n%s\n\"\n", destBuffer);
} else {
sprintf( destBuffer, pattern,
type == kDouble ? doubleExtensionPragma : "",
"",
typeName, (int)inVectorSize, (int)inBufferSize,
typeName, (int)inVectorSize, typeName, (int)outVectorSize,
(int)inBufferSize,
typeName, (int)inVectorSize, (int)inVectorSize, typeName );
}
}
int test_vload_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// We have no idea how much actual private storage is available, so just pick a reasonable value,
// which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
return test_vloadset( device, context, queue, create_private_load_code, 256 );
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#pragma mark -------------------- vstore harness --------------------------
typedef void (*create_vstore_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize );
int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
create_vstore_program_fn createFn, size_t bufferSize, MTdata d )
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 3 ];
size_t threads[ 1 ], localThreads[ 1 ];
size_t numElements, typeSize, numStores = (DEBUG) ? 16 : NUM_LOADS;
if (DEBUG)
bufferSize = (bufferSize < 128) ? bufferSize : 128;
typeSize = get_explicit_type_size( type );
numElements = bufferSize / ( typeSize * vecSize );
bufferSize = numElements * typeSize * vecSize; // To account for rounding
if( numStores > numElements * 2 / 3 )
{
// Note: unlike load, we have to restrict the # of stores here, since all offsets must be unique for our test
// (Plus, we leave some room for extra values to make sure didn't get written)
numStores = numElements * 2 / 3;
if( numStores < 1 )
numStores = 1;
}
if (DEBUG)
log_info("Testing: numStores: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numStores, (int)typeSize, vecSize, (int)numElements, (int)bufferSize);
#if !(defined(_WIN32) && defined(_MSC_VER))
cl_uint offsets[ numStores ];
#else
cl_uint* offsets = (cl_uint*)_malloca(numStores * sizeof(cl_uint));
#endif
char programSrc[ 10240 ];
size_t i;
#if !(defined(_WIN32) && defined(_MSC_VER))
char inBuffer[ numStores * typeSize * vecSize ];
#else
char* inBuffer = (char*)_malloca( numStores * typeSize * vecSize * sizeof(cl_char));
#endif
clProtectedArray outBuffer( numElements * typeSize * vecSize );
#if !(defined(_WIN32) && defined(_MSC_VER))
char referenceBuffer[ numElements * typeSize * vecSize ];
#else
char* referenceBuffer = (char*)_malloca(numElements * typeSize * vecSize * sizeof(cl_char));
#endif
// Create some random input data and random offsets to load from
generate_random_data( type, numStores * vecSize, d, (void *)inBuffer );
// Note: make sure no two offsets are the same, otherwise the output would depend on
// the order that threads ran in, and that would be next to impossible to verify
#if !(defined(_WIN32) && defined(_MSC_VER))
char flags[ numElements ];
#else
char* flags = (char*)_malloca( numElements * sizeof(char));
#endif
memset( flags, 0, numElements * sizeof(char) );
for( i = 0; i < numStores; i++ )
{
do
{
offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 2, d ); // Note: keep it one vec below the end for offset testing
} while( flags[ offsets[ i ] ] != 0 );
flags[ offsets[ i ] ] = -1;
if (LINEAR_OFFSETS)
offsets[i] = (int)i;
}
if (LINEAR_OFFSETS)
log_info("Offsets set to thread IDs to simplify output.\n");
createFn( programSrc, numElements, type, vecSize );
// Create our kernel
const char *ptr = programSrc;
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
// Get the number of args to differentiate the kernels with local storage. (They have 5)
cl_uint numArgs;
error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
test_error( error, "clGetKernelInfo failed");
// Set up parameters
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * typeSize * vecSize * sizeof(cl_char), (void *)inBuffer, &error );
test_error( error, "Unable to create kernel stream" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * sizeof(cl_uint), offsets, &error );
test_error( error, "Unable to create kernel stream" );
streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numElements * typeSize * vecSize, (void *)outBuffer, &error );
test_error( error, "Unable to create kernel stream" );
// Set parameters and run
if (numArgs == 5)
{
// We need to set the size of the local storage
error = clSetKernelArg(kernel, 0, bufferSize, NULL);
test_error( error, "clSetKernelArg for buffer failed");
for( i = 0; i < 3; i++ )
{
error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] );
test_error( error, "Unable to set kernel argument" );
}
}
else
{
// No local storage
for( i = 0; i < 3; i++ )
{
error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] );
if (error)
log_info("%s\n", programSrc);
test_error( error, "Unable to set kernel argument" );
}
}
threads[ 0 ] = numStores;
error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
test_error( error, "Unable to get local thread size" );
// Run in a loop, changing the address offset from 0 to ( vecSize - 1 ) each time, since
// otherwise stores might overlap each other, and it'd be a nightmare to test!
for( cl_uint addressOffset = 0; addressOffset < vecSize; addressOffset++ )
{
if (DEBUG)
log_info("\tstore addressOffset is %d, executing with threads %d\n", addressOffset, (int)threads[0]);
// Clear the results first
memset( outBuffer, 0, numElements * typeSize * vecSize );
error = clEnqueueWriteBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL );
test_error( error, "Unable to erase result stream" );
// Set up the new offset and run
if (numArgs == 5)
error = clSetKernelArg( kernel, 3+1, sizeof( cl_uint ), &addressOffset );
else
error = clSetKernelArg( kernel, 3, sizeof( cl_uint ), &addressOffset );
test_error( error, "Unable to set address offset argument" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to exec kernel" );
// Get the results
error = clEnqueueReadBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Create the reference results
memset( referenceBuffer, 0, numElements * typeSize * vecSize * sizeof(cl_char) );
for( i = 0; i < numStores; i++ )
{
memcpy( referenceBuffer + ( ( offsets[ i ] * vecSize ) + addressOffset ) * typeSize, inBuffer + i * typeSize * vecSize, typeSize * vecSize );
}
// Validate the results now
char *expected = referenceBuffer;
char *actual = (char *)(void *)outBuffer;
if (DEBUG)
{
log_info("Memory contents:\n");
for (i=0; i<numElements; i++)
{
char inString[1024];
char expectedString[ 1024 ], actualString[ 1024 ];
if (i < numStores)
{
log_info("buffer %3d: input: %s expected: %s got: %s (store offset %3d)", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ),
GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
GetDataVectorString( &(actual[i*typeSize*vecSize]), typeSize, vecSize, actualString ),
offsets[i]);
if (memcmp(&(expected[i*typeSize*vecSize]), &(actual[i*typeSize*vecSize]), typeSize * vecSize) != 0)
log_error(" << ERROR\n");
else
log_info("\n");
}
else
{
log_info("buffer %3d: input: %s expected: %s got: %s\n", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ),
GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
GetDataVectorString( &(actual[i*typeSize*vecSize]), typeSize, vecSize, actualString ));
}
}
}
for( i = 0; i < numElements; i++ )
{
if( memcmp( expected, actual, typeSize * vecSize ) != 0 )
{
char expectedString[ 1024 ], actualString[ 1024 ];
log_error( "ERROR: Data sample %d for vstore of %s%d did not validate (expected {%s}, got {%s}",
(int)i, get_explicit_type_name( type ), vecSize, GetDataVectorString( expected, typeSize, vecSize, expectedString ),
GetDataVectorString( actual, typeSize, vecSize, actualString ) );
size_t j;
for( j = 0; j < numStores; j++ )
{
if( offsets[ j ] == (cl_uint)i )
{
log_error( ", stored from store #%d (of %d, offset = %d) with address offset of %d", (int)j, (int)numStores, offsets[j], (int)addressOffset );
break;
}
}
if( j == numStores )
log_error( ", supposed to be canary value" );
log_error( ")\n" );
return 1;
}
expected += typeSize * vecSize;
actual += typeSize * vecSize;
}
}
return 0;
}
int test_vstoreset(cl_device_id device, cl_context context, cl_command_queue queue, create_vstore_program_fn createFn, size_t bufferSize )
{
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
const char *size_names[] = { "2", "3", "4", "8", "16"};
unsigned int typeIdx, sizeIdx;
int error = 0;
MTdata d = init_genrand( gRandomSeed );
log_info("Testing with buffer size of %d.\n", (int)bufferSize);
for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
{
if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
continue;
if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
continue;
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
{
log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
int error_this_type = test_vstore( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, d );
if (error_this_type)
{
log_error("Failure; skipping further sizes for this type.\n");
error += error_this_type;
break;
}
}
}
free_mtdata(d);
return error;
}
#pragma mark -------------------- vstore test cases --------------------------
void create_global_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
{
const char *pattern =
"%s"
"__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" vstore%d( srcValues[ tid ], offsets[ tid ], destBuffer + alignmentOffset );\n"
"}\n";
const char *patternV3 =
"%s"
"__kernel void test_fn( __global %s3 *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" if((tid&3) == 0) { // if \"tid\" is a multiple of 4 \n"
" vstore3( srcValues[ 3*(tid>>2) ], offsets[ tid ], destBuffer + alignmentOffset );\n"
" } else {\n"
" vstore3( vload3(tid, (__global %s *)srcValues), offsets[ tid ], destBuffer + alignmentOffset );\n"
" }\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
typeName, typeName, typeName);
} else {
sprintf( destBuffer, pattern,
type == kDouble ? doubleExtensionPragma : "",
typeName, (int)inVectorSize, typeName, (int)inVectorSize );
}
// if(inVectorSize == 3 || inVectorSize == 4) {
// log_info("\n----\n%s\n----\n", destBuffer);
// }
}
int test_vstore_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
return test_vstoreset( device, context, queue, create_global_store_code, 10240 );
}
void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
{
const char *pattern =
"%s"
"\n"
"__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
" sSharedStorage[ offsets[tid] ] = (%s%d)(%s)0;\n"
" sSharedStorage[ offsets[tid] +1 ] = sSharedStorage[ offsets[tid] ];\n"
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
" vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local %s *)sSharedStorage ) + alignmentOffset );\n"
"\n"
// Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output
// buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
// otherwise, local threads would be overwriting results from other local threads
" int i;\n"
" __local %s *sp = (__local %s*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n"
" __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
" for( i = 0; (size_t)i < sizeof( sSharedStorage[0]) / sizeof( *sp ); i++ ) \n"
" dp[i] = sp[i];\n"
"}\n";
const char *patternV3 =
"%s"
"\n"
"__kernel void test_fn(__local %s *sSharedStorage, __global %s *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
" sSharedStorage[ 3*offsets[tid] ] = (%s)0;\n"
" sSharedStorage[ 3*offsets[tid] +1 ] = \n"
" sSharedStorage[ 3*offsets[tid] ];\n"
" sSharedStorage[ 3*offsets[tid] +2 ] = \n"
" sSharedStorage[ 3*offsets[tid]];\n"
" sSharedStorage[ 3*offsets[tid] +3 ] = \n"
" sSharedStorage[ 3*offsets[tid]];\n"
" sSharedStorage[ 3*offsets[tid] +4 ] = \n"
" sSharedStorage[ 3*offsets[tid] ];\n"
" sSharedStorage[ 3*offsets[tid] +5 ] = \n"
" sSharedStorage[ 3*offsets[tid]];\n"
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
" vstore3( vload3(tid,srcValues), offsets[ tid ], sSharedStorage + alignmentOffset );\n"
"\n"
// Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output
// buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
// otherwise, local threads would be overwriting results from other local threads
" int i;\n"
" __local %s *sp = (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n"
" __global %s *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n"
" for( i = 0; i < 3; i++ ) \n"
" dp[i] = sp[i];\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
typeName,
typeName,
typeName, typeName,
typeName, typeName, typeName );
} else {
sprintf( destBuffer, pattern,
type == kDouble ? doubleExtensionPragma : "",
typeName, (int)inVectorSize,
typeName, (int)inVectorSize, typeName, (int)inVectorSize,
typeName, (int)inVectorSize, typeName,
(int)inVectorSize, typeName, typeName,
typeName, typeName, typeName );
}
// log_info(destBuffer);
}
int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// Determine the max size of a local buffer that we can test against
cl_ulong localSize;
int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
test_error( error, "Unable to get max size of local memory buffer" );
if( localSize > 10240 )
localSize = 10240;
if (localSize > 4096)
localSize -= 2048;
else
localSize /= 2;
return test_vstoreset( device, context, queue, create_local_store_code, (size_t)localSize );
}
void create_private_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
{
const char *pattern =
"%s"
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
"\n"
"__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
"{\n"
" __private %s%d sPrivateStorage[ %d ];\n"
" int tid = get_global_id( 0 );\n"
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
" sPrivateStorage[tid] = (%s%d)(%s)0;\n"
"\n"
" vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
"\n"
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
// otherwise, local threads would be overwriting results from other local threads
" uint i;\n"
" __private %s *sp = (__private %s*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n"
" __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
" for( i = 0; i < sizeof( sPrivateStorage[0]) / sizeof( *sp ); i++ ) \n"
" dp[i] = sp[i];\n"
"}\n";
const char *patternV3 =
"%s"
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
"\n"
"__kernel void test_fn( __global %s *srcValues, __global uint *offsets, __global %s3 *destBuffer, uint alignmentOffset )\n"
"{\n"
" __private %s3 sPrivateStorage[ %d ];\n" // keep this %d
" int tid = get_global_id( 0 );\n"
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
" sPrivateStorage[tid] = (%s3)(%s)0;\n"
"\n"
" vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
"\n"
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
// otherwise, local threads would be overwriting results from other local threads
" uint i;\n"
" __private %s *sp = ((__private %s*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n"
" __global %s *dp = ((__global %s*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n"
" for( i = 0; i < 3; i++ ) \n"
" dp[i] = sp[i];\n"
"}\n";
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
sprintf( destBuffer, patternV3,
type == kDouble ? doubleExtensionPragma : "",
typeName, typeName,
typeName, (int)inBufferSize,
typeName, typeName,
typeName, typeName, typeName, typeName, typeName );
} else {
sprintf( destBuffer, pattern,
type == kDouble ? doubleExtensionPragma : "",
typeName, (int)inVectorSize, typeName, (int)inVectorSize,
typeName, (int)inVectorSize, (int)inBufferSize,
typeName, (int)inVectorSize, typeName,
(int)inVectorSize, typeName, typeName, typeName, typeName, typeName );
}
}
int test_vstore_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// We have no idea how much actual private storage is available, so just pick a reasonable value,
// which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
return test_vstoreset( device, context, queue, create_private_store_code, 256 );
}

View File

@@ -0,0 +1,177 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
typedef struct work_item_data
{
cl_uint workDim;
cl_uint globalSize[ 3 ];
cl_uint globalID[ 3 ];
cl_uint localSize[ 3 ];
cl_uint localID[ 3 ];
cl_uint numGroups[ 3 ];
cl_uint groupID[ 3 ];
};
static const char *workItemKernelCode =
"typedef struct {\n"
" uint workDim;\n"
" uint globalSize[ 3 ];\n"
" uint globalID[ 3 ];\n"
" uint localSize[ 3 ];\n"
" uint localID[ 3 ];\n"
" uint numGroups[ 3 ];\n"
" uint groupID[ 3 ];\n"
" } work_item_data;\n"
"\n"
"__kernel void sample_kernel( __global work_item_data *outData )\n"
"{\n"
" int id = get_global_id(0);\n"
" outData[ id ].workDim = (uint)get_work_dim();\n"
" for( uint i = 0; i < get_work_dim(); i++ )\n"
" {\n"
" outData[ id ].globalSize[ i ] = (uint)get_global_size( i );\n"
" outData[ id ].globalID[ i ] = (uint)get_global_id( i );\n"
" outData[ id ].localSize[ i ] = (uint)get_local_size( i );\n"
" outData[ id ].localID[ i ] = (uint)get_local_id( i );\n"
" outData[ id ].numGroups[ i ] = (uint)get_num_groups( i );\n"
" outData[ id ].groupID[ i ] = (uint)get_group_id( i );\n"
" }\n"
"}";
#define NUM_TESTS 1
int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper outData;
work_item_data testData[ 10240 ];
size_t threads[3], localThreads[3];
MTdata d;
error = create_single_kernel_helper( context, &program, &kernel, 1, &workItemKernelCode, "sample_kernel" );
test_error( error, "Unable to create testing kernel" );
outData = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( testData ), NULL, &error );
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( outData ), &outData );
test_error( error, "Unable to set kernel arg" );
d = init_genrand( gRandomSeed );
for( size_t dim = 1; dim <= 3; dim++ )
{
for( int i = 0; i < NUM_TESTS; i++ )
{
size_t numItems = 1;
for( size_t j = 0; j < dim; j++ )
{
// All of our thread sizes should be within the max local sizes, since they're all <= 20
threads[ j ] = (size_t)random_in_range( 1, 20, d );
localThreads[ j ] = threads[ j ] / (size_t)random_in_range( 1, (int)threads[ j ], d );
while( localThreads[ j ] > 1 && ( threads[ j ] % localThreads[ j ] != 0 ) )
localThreads[ j ]--;
numItems *= threads[ j ];
// Hack for now: localThreads > 1 are iffy
localThreads[ j ] = 1;
}
error = clEnqueueNDRangeKernel( queue, kernel, (cl_uint)dim, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
error = clEnqueueReadBuffer( queue, outData, CL_TRUE, 0, sizeof( testData ), testData, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Validate
for( size_t q = 0; q < threads[0]; q++ )
{
// We can't really validate the actual value of each one, but we can validate that they're within a sane range
if( testData[ q ].workDim != (cl_uint)dim )
{
log_error( "ERROR: get_work_dim() did not return proper value for %d dimensions (expected %d, got %d)\n", (int)dim, (int)dim, (int)testData[ q ].workDim );
free_mtdata(d);
return -1;
}
for( size_t j = 0; j < dim; j++ )
{
if( testData[ q ].globalSize[ j ] != (cl_uint)threads[ j ] )
{
log_error( "ERROR: get_global_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n",
(int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalSize[ j ] );
free_mtdata(d);
return -1;
}
if( testData[ q ].globalID[ j ] < 0 || testData[ q ].globalID[ j ] >= (cl_uint)threads[ j ] )
{
log_error( "ERROR: get_global_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
(int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalID[ j ] );
free_mtdata(d);
return -1;
}
if( testData[ q ].localSize[ j ] != (cl_uint)localThreads[ j ] )
{
log_error( "ERROR: get_local_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n",
(int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localSize[ j ] );
free_mtdata(d);
return -1;
}
if( testData[ q ].localID[ j ] < 0 && testData[ q ].localID[ j ] >= (cl_uint)localThreads[ j ] )
{
log_error( "ERROR: get_local_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
(int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localID[ j ] );
free_mtdata(d);
return -1;
}
size_t groupCount = ( threads[ j ] + localThreads[ j ] - 1 ) / localThreads[ j ];
if( testData[ q ].numGroups[ j ] != (cl_uint)groupCount )
{
log_error( "ERROR: get_num_groups(%d) did not return proper value for %d dimensions (expected %d with global dim %d and local dim %d, got %d)\n",
(int)j, (int)dim, (int)groupCount, (int)threads[ j ], (int)localThreads[ j ], (int)testData[ q ].numGroups[ j ] );
free_mtdata(d);
return -1;
}
if( testData[ q ].groupID[ j ] < 0 || testData[ q ].groupID[ j ] >= (cl_uint)groupCount )
{
log_error( "ERROR: get_group_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
(int)j, (int)dim, (int)groupCount, (int)testData[ q ].groupID[ j ] );
free_mtdata(d);
return -1;
}
}
}
}
}
free_mtdata(d);
return 0;
}

View File

@@ -0,0 +1,300 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *bgra8888_write_kernel_code =
"\n"
"__kernel void test_bgra8888_write(__global unsigned char *src, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
" float4 color;\n"
"\n"
" indx *= 4;\n"
" color = (float4)((float)src[indx+2], (float)src[indx+1], (float)src[indx+0], (float)src[indx+3]);\n"
" color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n";
static const char *rgba8888_write_kernel_code =
"\n"
"__kernel void test_rgba8888_write(__global unsigned char *src, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
" float4 color;\n"
"\n"
" indx *= 4;\n"
" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
" color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n";
static unsigned char *
generate_8888_image(int w, int h, MTdata d)
{
cl_uchar *ptr = (cl_uchar *)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (cl_uchar)genrand_int32(d);
return ptr;
}
static int
verify_bgra8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
{
log_error("WRITE_IMAGE_BGRA_UNORM_INT8 test failed\n");
return -1;
}
}
log_info("WRITE_IMAGE_BGRA_UNORM_INT8 test passed\n");
return 0;
}
static int
verify_rgba8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
{
log_error("WRITE_IMAGE_RGBA_UNORM_INT8 test failed\n");
return -1;
}
}
log_info("WRITE_IMAGE_RGBA_UNORM_INT8 test passed\n");
return 0;
}
int test_writeimage(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[6];
cl_program program[2];
cl_kernel kernel[4];
unsigned char *input_ptr[2], *output_ptr;
cl_image_format img_format;
size_t threads[2];
int img_width = 512;
int img_height = 512;
int i, err, any_err = 0;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, 1};
size_t length = img_width * img_height * 4 * sizeof(unsigned char);
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
MTdata d = init_genrand( gRandomSeed );
input_ptr[0] = generate_8888_image(img_width, img_height, d);
input_ptr[1] = generate_8888_image(img_width, img_height, d);
free_mtdata(d); d = NULL;
output_ptr = (unsigned char*)malloc(length);
img_format.image_channel_order = CL_BGRA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[0])
{
log_error("create_image_2d failed\n");
return -1;
}
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[1])
{
log_error("create_image_2d failed\n");
return -1;
}
img_format.image_channel_order = CL_BGRA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[2] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[2])
{
log_error("create_image_2d failed\n");
return -1;
}
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[3] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[3])
{
log_error("create_image_2d failed\n");
return -1;
}
streams[4] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[4])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[5] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[5])
{
log_error("clCreateBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[4], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[5], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_write_kernel_code, "test_bgra8888_write" );
if (err)
return -1;
kernel[2] = clCreateKernel(program[0], "test_bgra8888_write", NULL);
if (!kernel[2])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_write_kernel_code, "test_rgba8888_write" );
if (err)
return -1;
kernel[3] = clCreateKernel(program[1], "test_rgba8888_write", NULL);
if (!kernel[3])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[4], &streams[4]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[5], &streams[5]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[2], 0, sizeof streams[4], &streams[4]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[2], &streams[2]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[3], 0, sizeof streams[5], &streams[5]);
err |= clSetKernelArg(kernel[3], 1, sizeof streams[3], &streams[3]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)img_width;
threads[1] = (unsigned int)img_height;
for (i=0; i<4; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadImage failed\n");
return -1;
}
switch (i)
{
case 0:
case 2:
err = verify_bgra8888_image(input_ptr[i&0x01], output_ptr, img_width, img_height);
break;
case 1:
case 3:
err = verify_rgba8888_image(input_ptr[i&0x01], output_ptr, img_width, img_height);
break;
}
//if (err)
//break;
any_err |= err;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
clReleaseMemObject(streams[4]);
clReleaseMemObject(streams[5]);
for (i=0; i<2; i++)
{
clReleaseKernel(kernel[i]);
clReleaseKernel(kernel[i+2]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(output_ptr);
return any_err;
}

View File

@@ -0,0 +1,188 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *rgbaFFFF_write_kernel_code =
"__kernel void test_rgbaFFFF_write(__global float *src, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
" float4 color;\n"
"\n"
" indx *= 4;\n"
" color = (float4)(src[indx+0], src[indx+1], src[indx+2], src[indx+3]);\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n";
static float *
generate_float_image(int w, int h, MTdata d)
{
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
return ptr;
}
static int
verify_float_image(const char *string, float *image, float *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
{
log_error("%s failed\n", string);
return -1;
}
}
log_info("%s passed\n", string);
return 0;
}
int test_writeimage_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_program program;
cl_kernel kernel[2];
cl_image_format img_format;
float *input_ptr, *output_ptr;
size_t threads[2];
int img_width = 512;
int img_height = 512;
int i, err, any_err = 0;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, 1};
size_t length = img_width * img_height * 4 * sizeof(float);
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
input_ptr = generate_float_image(img_width, img_height, d);
free_mtdata(d); d = NULL;
output_ptr = (float*)malloc(length);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_FLOAT;
streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[0])
{
log_error("create_image_2d failed\n");
return -1;
}
streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[1])
{
log_error("create_image_2d failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateArray failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgbaFFFF_write_kernel_code, "test_rgbaFFFF_write" );
if (err)
return -1;
kernel[1] = clCreateKernel(program, "test_rgbaFFFF_write", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)img_width;
threads[1] = (unsigned int)img_height;
for (i=0; i<2; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clExecuteKernel failed\n");
return -1;
}
err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadImage failed\n");
return -1;
}
err = verify_float_image((i == 0) ? "WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_READ_WRITE" :
"WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_WRITE_ONLY",
input_ptr, output_ptr, img_width, img_height);
any_err |= err;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return any_err;
}

View File

@@ -0,0 +1,194 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *rgba16_write_kernel_code =
"__kernel void test_rgba16_write(__global unsigned short *src, write_only image2d_t dstimg)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
" float4 color;\n"
"\n"
" indx *= 4;\n"
" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
" color /= 65535.0f;\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n";
static unsigned short *
generate_16bit_image(int w, int h, MTdata d)
{
cl_ushort *ptr = (cl_ushort*)malloc(w * h * 4 * sizeof(cl_ushort));
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (cl_ushort)genrand_int32(d);
return ptr;
}
// normalized 16bit ints ... get dived by 64k then muled by 64k...
// give the poor things some tolerance
#define MAX_ERR 1
static int
verify_16bit_image(const char *string, cl_ushort *image, cl_ushort *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (abs(outptr[i] - image[i]) > MAX_ERR)
{
log_error("%s failed\n", string);
return -1;
}
}
log_info("%s passed\n", string);
return 0;
}
int test_writeimage_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_program program;
cl_kernel kernel[2];
cl_image_format img_format;
cl_ushort *input_ptr, *output_ptr;
size_t threads[2];
int img_width = 512;
int img_height = 512;
int i, err, any_err = 0;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {img_width, img_height, 1};
size_t length = img_width * img_height * 4 * sizeof(cl_ushort);
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
MTdata d = init_genrand( gRandomSeed );
input_ptr = generate_16bit_image(img_width, img_height, d);
free_mtdata(d); d = NULL;
output_ptr = (cl_ushort*)malloc(length);
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[0])
{
log_error("create_image_2d failed\n");
return -1;
}
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT16;
streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
if (!streams[1])
{
log_error("create_image_2d failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateArray failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgba16_write_kernel_code, "test_rgba16_write" );
if (err)
return -1;
kernel[1] = clCreateKernel(program, "test_rgba16_write", NULL);
if (!kernel[1])
{
log_error("clCreateKernel failed\n");
return -1;
}
err = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)img_width;
threads[1] = (unsigned int)img_height;
for (i=0; i<2; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clExecuteKernel failed\n");
return -1;
}
err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadImage failed\n");
return -1;
}
err = verify_16bit_image((i == 0) ? "WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_READ_WRITE" :
"WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_WRITE_ONLY",
input_ptr, output_ptr, img_width, img_height);
any_err |= err;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return any_err;
}

Some files were not shown because too many files have changed in this diff Show More