mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-20 22:39:03 +00:00
Initial open source release of OpenCL 2.0 CTS.
This commit is contained in:
82
test_conformance/basic/CMakeLists.txt
Normal file
82
test_conformance/basic/CMakeLists.txt
Normal file
@@ -0,0 +1,82 @@
|
||||
set(MODULE_NAME BASIC)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.c
|
||||
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
|
||||
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
|
||||
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
|
||||
test_hiloeo.c test_local.c test_pointercast.c
|
||||
test_if.c test_loop.c
|
||||
test_readimage.c test_readimage_int16.c test_readimage_fp32.c
|
||||
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
|
||||
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
|
||||
test_multireadimageonefmt.c test_multireadimagemultifmt.c
|
||||
test_imagedim.c
|
||||
test_vloadstore.c
|
||||
test_int2float.c test_float2int.c
|
||||
test_createkernelsinprogram.c
|
||||
test_hostptr.c
|
||||
test_explicit_s2v.cpp
|
||||
test_constant.c
|
||||
test_image_multipass.c
|
||||
test_imagereadwrite.c test_imagereadwrite3d.c
|
||||
test_image_param.c
|
||||
test_imagenpot.c
|
||||
test_image_r8.c
|
||||
test_barrier.c
|
||||
test_basic_parameter_types.c
|
||||
test_arrayreadwrite.c
|
||||
test_arraycopy.c
|
||||
test_imagearraycopy.c
|
||||
test_imagearraycopy3d.c
|
||||
test_imagecopy.c
|
||||
test_imagerandomcopy.c
|
||||
test_arrayimagecopy.c
|
||||
test_arrayimagecopy3d.c
|
||||
test_imagecopy3d.c
|
||||
test_enqueue_map.cpp
|
||||
test_work_item_functions.cpp
|
||||
test_astype.cpp
|
||||
test_async_copy.cpp
|
||||
test_sizeof.c
|
||||
test_vector_creation.cpp
|
||||
test_vec_type_hint.c
|
||||
test_numeric_constants.cpp
|
||||
test_constant_source.cpp
|
||||
test_bufferreadwriterect.c
|
||||
test_async_strided_copy.cpp
|
||||
test_preprocessors.cpp
|
||||
test_kernel_memory_alignment.cpp
|
||||
test_global_work_offsets.cpp
|
||||
test_kernel_call_kernel_function.cpp
|
||||
test_local_kernel_scope.cpp
|
||||
test_progvar.cpp
|
||||
test_wg_barrier.c
|
||||
test_global_linear_id.c
|
||||
test_local_linear_id.c
|
||||
test_enqueued_local_size.c
|
||||
test_simple_image_pitch.c
|
||||
test_get_linear_ids.cpp
|
||||
test_rw_image_access_qualifier.c
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
../../test_common/harness/imageHelpers.cpp
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/rounding_mode.c
|
||||
../../test_common/harness/msvc9.c
|
||||
test_wg_barrier.c
|
||||
test_enqueued_local_size.c
|
||||
test_global_linear_id.c
|
||||
test_local_linear_id.c
|
||||
test_progvar.cpp
|
||||
)
|
||||
|
||||
if(APPLE)
|
||||
list(APPEND ${MODULE_NAME}_SOURCES test_queue_priority.c)
|
||||
endif(APPLE)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
74
test_conformance/basic/Jamfile
Normal file
74
test_conformance/basic/Jamfile
Normal file
@@ -0,0 +1,74 @@
|
||||
project
|
||||
: requirements
|
||||
<toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe test_basic
|
||||
: main.c
|
||||
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
|
||||
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
|
||||
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
|
||||
test_hiloeo.c test_local.c test_pointercast.c
|
||||
test_if.c test_sizeof.c test_loop.c
|
||||
test_readimage.c test_readimage_int16.c test_readimage_fp32.c
|
||||
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
|
||||
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
|
||||
test_multireadimageonefmt.c test_multireadimagemultifmt.c
|
||||
test_imagedim.c
|
||||
test_vloadstore.c
|
||||
test_int2float.c test_float2int.c
|
||||
test_createkernelsinprogram.c
|
||||
test_hostptr.c
|
||||
test_explicit_s2v.cpp
|
||||
test_constant.c
|
||||
test_constant_source.cpp
|
||||
test_image_multipass.c
|
||||
test_imagereadwrite.c test_imagereadwrite3d.c
|
||||
test_bufferreadwriterect.c
|
||||
test_image_param.c
|
||||
test_imagenpot.c
|
||||
test_image_r8.c
|
||||
test_barrier.c
|
||||
test_arrayreadwrite.c
|
||||
test_arraycopy.c
|
||||
test_imagearraycopy.c
|
||||
test_imagearraycopy3d.c
|
||||
test_imagecopy.c
|
||||
test_imagerandomcopy.c
|
||||
test_arrayimagecopy.c
|
||||
test_arrayimagecopy3d.c
|
||||
test_imagecopy3d.c
|
||||
test_enqueue_map.cpp
|
||||
test_work_item_functions.cpp
|
||||
test_astype.cpp
|
||||
test_async_copy.cpp
|
||||
test_async_strided_copy.cpp
|
||||
test_numeric_constants.cpp
|
||||
test_kernel_call_kernel_function.cpp
|
||||
test_basic_parameter_types.c
|
||||
test_vector_creation.cpp
|
||||
test_vec_type_hint.c
|
||||
test_preprocessors.cpp
|
||||
test_kernel_memory_alignment.cpp
|
||||
test_global_work_offsets.cpp
|
||||
test_local_kernel_scope.cpp
|
||||
test_get_linear_ids.cpp
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/rounding_mode.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
../../test_common/harness/imageHelpers.cpp
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/conversions.c
|
||||
: <target-os>windows:<source>../../test_common/harness/msvc9.c
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_basic
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/basic
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/basic
|
||||
;
|
||||
|
||||
103
test_conformance/basic/Makefile
Normal file
103
test_conformance/basic/Makefile
Normal file
@@ -0,0 +1,103 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c \
|
||||
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c \
|
||||
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c \
|
||||
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c \
|
||||
test_hiloeo.c test_local.c test_local_kernel_scope.cpp test_pointercast.c \
|
||||
test_if.c test_sizeof.c test_loop.c \
|
||||
test_readimage.c test_readimage_int16.c test_readimage_fp32.c \
|
||||
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c \
|
||||
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c \
|
||||
test_multireadimageonefmt.c test_multireadimagemultifmt.c \
|
||||
test_imagedim.c \
|
||||
test_vloadstore.c \
|
||||
test_int2float.c test_float2int.c \
|
||||
test_createkernelsinprogram.c \
|
||||
test_hostptr.c \
|
||||
test_explicit_s2v.cpp \
|
||||
test_constant.c \
|
||||
test_constant_source.cpp \
|
||||
test_image_multipass.c \
|
||||
test_imagereadwrite.c test_imagereadwrite3d.c \
|
||||
test_bufferreadwriterect.c \
|
||||
test_image_param.c \
|
||||
test_imagenpot.c \
|
||||
test_image_r8.c \
|
||||
test_barrier.c \
|
||||
test_wg_barrier.c \
|
||||
test_arrayreadwrite.c \
|
||||
test_arraycopy.c \
|
||||
test_imagearraycopy.c \
|
||||
test_imagearraycopy3d.c \
|
||||
test_imagecopy.c \
|
||||
test_imagerandomcopy.c \
|
||||
test_arrayimagecopy.c \
|
||||
test_arrayimagecopy3d.c\
|
||||
test_imagecopy3d.c \
|
||||
test_enqueue_map.cpp \
|
||||
test_work_item_functions.cpp \
|
||||
test_astype.cpp \
|
||||
test_async_copy.cpp \
|
||||
test_async_strided_copy.cpp \
|
||||
test_numeric_constants.cpp \
|
||||
test_kernel_call_kernel_function.cpp \
|
||||
test_basic_parameter_types.c \
|
||||
test_vector_creation.cpp \
|
||||
test_vec_type_hint.c \
|
||||
test_preprocessors.cpp \
|
||||
test_kernel_memory_alignment.cpp \
|
||||
test_global_work_offsets.cpp \
|
||||
test_simple_image_pitch.c \
|
||||
test_queue_priority.c \
|
||||
test_global_linear_id.c \
|
||||
test_local_linear_id.c \
|
||||
test_enqueued_local_size.c \
|
||||
test_get_linear_ids.c \
|
||||
test_progvar.cpp \
|
||||
test_rw_image_access_qualifier.c \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/threadTesting.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/rounding_mode.c \
|
||||
../../test_common/harness/kernelHelpers.c \
|
||||
../../test_common/harness/typeWrappers.cpp \
|
||||
../../test_common/harness/imageHelpers.cpp \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/conversions.c
|
||||
|
||||
DEFINES =
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
FRAMEWORK = $(SOURCES)
|
||||
HEADERS =
|
||||
TARGET = test_basic
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -O0 -Wshorten-64-to-32
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
|
||||
|
||||
|
||||
303
test_conformance/basic/main.c
Normal file
303
test_conformance/basic/main.c
Normal file
@@ -0,0 +1,303 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "procs.h"
|
||||
|
||||
// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
|
||||
// (for example, generate_random_image_data()), the tests are required to declare
|
||||
// the following variables (<rdar://problem/11111245>):
|
||||
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
|
||||
bool gTestRounding = false;
|
||||
|
||||
basefn basefn_list[] = {
|
||||
test_hostptr,
|
||||
test_fpmath_float,
|
||||
test_fpmath_float2,
|
||||
test_fpmath_float4,
|
||||
test_intmath_int,
|
||||
test_intmath_int2,
|
||||
test_intmath_int4,
|
||||
test_intmath_long,
|
||||
test_intmath_long2,
|
||||
test_intmath_long4,
|
||||
test_hiloeo,
|
||||
test_if,
|
||||
test_sizeof,
|
||||
test_loop,
|
||||
test_pointer_cast,
|
||||
test_local_arg_def,
|
||||
test_local_kernel_def,
|
||||
test_local_kernel_scope,
|
||||
test_constant,
|
||||
test_constant_source,
|
||||
test_readimage,
|
||||
test_readimage_int16,
|
||||
test_readimage_fp32,
|
||||
test_writeimage,
|
||||
test_writeimage_int16,
|
||||
test_writeimage_fp32,
|
||||
test_multireadimageonefmt,
|
||||
|
||||
test_multireadimagemultifmt,
|
||||
test_image_r8,
|
||||
test_barrier,
|
||||
test_wg_barrier,
|
||||
test_int2float,
|
||||
test_float2int,
|
||||
test_imagereadwrite,
|
||||
test_imagereadwrite3d,
|
||||
test_readimage3d,
|
||||
test_readimage3d_int16,
|
||||
test_readimage3d_fp32,
|
||||
test_bufferreadwriterect,
|
||||
test_arrayreadwrite,
|
||||
test_arraycopy,
|
||||
test_imagearraycopy,
|
||||
test_imagearraycopy3d,
|
||||
test_imagecopy,
|
||||
test_imagecopy3d,
|
||||
test_imagerandomcopy,
|
||||
test_arrayimagecopy,
|
||||
test_arrayimagecopy3d,
|
||||
test_imagenpot,
|
||||
|
||||
test_vload_global,
|
||||
test_vload_local,
|
||||
test_vload_constant,
|
||||
test_vload_private,
|
||||
test_vstore_global,
|
||||
test_vstore_local,
|
||||
test_vstore_private,
|
||||
|
||||
test_createkernelsinprogram,
|
||||
test_imagedim_pow2,
|
||||
test_imagedim_non_pow2,
|
||||
test_image_param,
|
||||
test_image_multipass_integer_coord,
|
||||
test_image_multipass_float_coord,
|
||||
test_explicit_s2v_bool,
|
||||
test_explicit_s2v_char,
|
||||
test_explicit_s2v_uchar,
|
||||
test_explicit_s2v_short,
|
||||
test_explicit_s2v_ushort,
|
||||
test_explicit_s2v_int,
|
||||
test_explicit_s2v_uint,
|
||||
test_explicit_s2v_long,
|
||||
test_explicit_s2v_ulong,
|
||||
test_explicit_s2v_float,
|
||||
test_explicit_s2v_double,
|
||||
|
||||
test_enqueue_map_buffer,
|
||||
test_enqueue_map_image,
|
||||
|
||||
test_work_item_functions,
|
||||
|
||||
test_astype,
|
||||
|
||||
test_async_copy_global_to_local,
|
||||
test_async_copy_local_to_global,
|
||||
test_async_strided_copy_global_to_local,
|
||||
test_async_strided_copy_local_to_global,
|
||||
test_prefetch,
|
||||
|
||||
test_kernel_call_kernel_function,
|
||||
test_host_numeric_constants,
|
||||
test_kernel_numeric_constants,
|
||||
test_kernel_limit_constants,
|
||||
test_kernel_preprocessor_macros,
|
||||
|
||||
test_basic_parameter_types,
|
||||
test_vector_creation,
|
||||
test_vec_type_hint,
|
||||
test_kernel_memory_alignment_local,
|
||||
test_kernel_memory_alignment_global,
|
||||
test_kernel_memory_alignment_constant,
|
||||
test_kernel_memory_alignment_private,
|
||||
|
||||
test_progvar_prog_scope_misc,
|
||||
test_progvar_prog_scope_uninit,
|
||||
test_progvar_prog_scope_init,
|
||||
test_progvar_func_scope,
|
||||
|
||||
test_global_work_offsets,
|
||||
test_get_global_offset,
|
||||
|
||||
test_global_linear_id,
|
||||
test_local_linear_id,
|
||||
test_enqueued_local_size,
|
||||
|
||||
test_simple_read_image_pitch,
|
||||
test_simple_write_image_pitch,
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
test_queue_priority,
|
||||
#endif
|
||||
|
||||
test_get_linear_ids,
|
||||
test_rw_image_access_qualifier
|
||||
};
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"hostptr",
|
||||
"fpmath_float",
|
||||
"fpmath_float2",
|
||||
"fpmath_float4",
|
||||
"intmath_int",
|
||||
"intmath_int2",
|
||||
"intmath_int4",
|
||||
"intmath_long",
|
||||
"intmath_long2",
|
||||
"intmath_long4",
|
||||
"hiloeo",
|
||||
"if",
|
||||
"sizeof",
|
||||
"loop",
|
||||
"pointer_cast",
|
||||
"local_arg_def",
|
||||
"local_kernel_def",
|
||||
"local_kernel_scope",
|
||||
"constant",
|
||||
"constant_source",
|
||||
"readimage",
|
||||
"readimage_int16",
|
||||
"readimage_fp32",
|
||||
"writeimage",
|
||||
"writeimage_int16",
|
||||
"writeimage_fp32",
|
||||
"mri_one",
|
||||
|
||||
"mri_multiple",
|
||||
"image_r8",
|
||||
"barrier",
|
||||
"wg_barrier",
|
||||
"int2float",
|
||||
"float2int",
|
||||
"imagereadwrite",
|
||||
"imagereadwrite3d",
|
||||
"readimage3d",
|
||||
"readimage3d_int16",
|
||||
"readimage3d_fp32",
|
||||
"bufferreadwriterect",
|
||||
"arrayreadwrite",
|
||||
"arraycopy",
|
||||
"imagearraycopy",
|
||||
"imagearraycopy3d",
|
||||
"imagecopy",
|
||||
"imagecopy3d",
|
||||
"imagerandomcopy",
|
||||
"arrayimagecopy",
|
||||
"arrayimagecopy3d",
|
||||
"imagenpot",
|
||||
|
||||
"vload_global",
|
||||
"vload_local",
|
||||
"vload_constant",
|
||||
"vload_private",
|
||||
"vstore_global",
|
||||
"vstore_local",
|
||||
"vstore_private",
|
||||
|
||||
"createkernelsinprogram",
|
||||
"imagedim_pow2",
|
||||
"imagedim_non_pow2",
|
||||
"image_param",
|
||||
"image_multipass_integer_coord",
|
||||
"image_multipass_float_coord",
|
||||
"explicit_s2v_bool",
|
||||
"explicit_s2v_char",
|
||||
"explicit_s2v_uchar",
|
||||
"explicit_s2v_short",
|
||||
"explicit_s2v_ushort",
|
||||
"explicit_s2v_int",
|
||||
"explicit_s2v_uint",
|
||||
"explicit_s2v_long",
|
||||
"explicit_s2v_ulong",
|
||||
"explicit_s2v_float",
|
||||
"explicit_s2v_double",
|
||||
|
||||
"enqueue_map_buffer",
|
||||
"enqueue_map_image",
|
||||
|
||||
"work_item_functions",
|
||||
|
||||
"astype",
|
||||
|
||||
"async_copy_global_to_local",
|
||||
"async_copy_local_to_global",
|
||||
"async_strided_copy_global_to_local",
|
||||
"async_strided_copy_local_to_global",
|
||||
"prefetch",
|
||||
|
||||
"kernel_call_kernel_function",
|
||||
"host_numeric_constants",
|
||||
"kernel_numeric_constants",
|
||||
"kernel_limit_constants",
|
||||
"kernel_preprocessor_macros",
|
||||
|
||||
"parameter_types",
|
||||
|
||||
"vector_creation",
|
||||
"vec_type_hint",
|
||||
|
||||
"kernel_memory_alignment_local",
|
||||
"kernel_memory_alignment_global",
|
||||
"kernel_memory_alignment_constant",
|
||||
"kernel_memory_alignment_private",
|
||||
|
||||
"progvar_prog_scope_misc",
|
||||
"progvar_prog_scope_uninit",
|
||||
"progvar_prog_scope_init",
|
||||
"progvar_func_scope",
|
||||
|
||||
"global_work_offsets",
|
||||
"get_global_offset",
|
||||
|
||||
"global_linear_id",
|
||||
"local_linear_id",
|
||||
"enqueued_local_size",
|
||||
|
||||
"simple_read_image_pitch",
|
||||
"simple_write_image_pitch",
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
"queue_priority",
|
||||
#endif
|
||||
|
||||
"get_linear_ids",
|
||||
"test_rw_image_access_qualifier",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
|
||||
}
|
||||
|
||||
|
||||
|
||||
160
test_conformance/basic/procs.h
Normal file
160
test_conformance/basic/procs.h
Normal file
@@ -0,0 +1,160 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/rounding_mode.h"
|
||||
|
||||
extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
|
||||
|
||||
extern int test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmath_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmath_long2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmath_long4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_hiloeo(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_if(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_sizeof(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_loop(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_pointer_cast(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_local_arg_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_local_kernel_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_local_kernel_scope(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_constant_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_writeimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_writeimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_writeimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_multireadimageonefmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_multireadimagemultifmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_image_r8(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_simplebarrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_wg_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_int2float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_float2int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagearraycopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagearraycopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagereadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagereadwrite3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage3d_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage3d_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements);
|
||||
extern int test_imagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagerandomcopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
|
||||
extern int test_arrayimagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_arrayimagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagenpot(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_sampler_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_sampler_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_createkernelsinprogram(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_single_large_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_multiple_max_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_arrayreadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagedim_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagedim_non_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_image_param(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_image_multipass_integer_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_image_multipass_float_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_vload_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vload_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vload_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vload_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vstore_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vstore_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vstore_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_astype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_native_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_limit_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
|
||||
extern int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
|
||||
extern int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
|
||||
extern int test_global_work_offsets(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_get_global_offset(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
|
||||
extern int test_global_linear_id(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_local_linear_id(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
|
||||
extern int test_simple_read_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements);
|
||||
extern int test_simple_write_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements);
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
extern int test_queue_priority(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
#endif
|
||||
|
||||
extern int test_get_linear_ids(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements);
|
||||
extern int test_rw_image_access_qualifier(cl_device_id device_id, cl_context context, cl_command_queue commands, int num_elements);
|
||||
|
||||
3
test_conformance/basic/run_array
Normal file
3
test_conformance/basic/run_array
Normal file
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
cd `dirname $0`
|
||||
./test_basic arrayreadwrite arraycopy bufferreadwriterect $@
|
||||
3
test_conformance/basic/run_array_image_copy
Normal file
3
test_conformance/basic/run_array_image_copy
Normal file
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
cd `dirname $0`
|
||||
./test_basic arrayimagecopy arrayimagecopy3d imagearraycopy
|
||||
17
test_conformance/basic/run_image
Normal file
17
test_conformance/basic/run_image
Normal file
@@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
cd `dirname $0`
|
||||
./test_basic \
|
||||
imagecopy imagerandomcopy \
|
||||
imagearraycopy imagearraycopy3d \
|
||||
image_r8 \
|
||||
readimage readimage_int16 readimage_fp32 \
|
||||
writeimage writeimage_int16 writeimage_fp32 \
|
||||
imagenpot \
|
||||
image_param \
|
||||
image_multipass_integer_coord \
|
||||
readimage3d \
|
||||
readimage3d_int16 \
|
||||
readimage3d_fp32 \
|
||||
imagereadwrite3d \
|
||||
imagereadwrite \
|
||||
$@
|
||||
4
test_conformance/basic/run_multi_read_image
Normal file
4
test_conformance/basic/run_multi_read_image
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/bin/sh
|
||||
cd `dirname $0`
|
||||
./test_basic mri_one mri_multiple
|
||||
|
||||
201
test_conformance/basic/test_arraycopy.c
Normal file
201
test_conformance/basic/test_arraycopy.c
Normal file
@@ -0,0 +1,201 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *copy_kernel_code =
|
||||
"__kernel void test_copy(__global unsigned int *src, __global unsigned int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = src[tid];\n"
|
||||
"}\n";
|
||||
|
||||
int
|
||||
test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_uint *input_ptr, *output_ptr;
|
||||
cl_mem streams[4], results;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
unsigned num_elements = 128 * 1024;
|
||||
cl_uint num_copies = 1;
|
||||
size_t delta_offset;
|
||||
unsigned i;
|
||||
cl_int err;
|
||||
MTdata d;
|
||||
|
||||
int error_count = 0;
|
||||
|
||||
input_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
|
||||
output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
|
||||
|
||||
// results
|
||||
results = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
/*****************************************************************************************************************************************/
|
||||
#pragma mark client backing
|
||||
|
||||
log_info("Testing CL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer\n");
|
||||
// randomize data
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
|
||||
// client backing
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
delta_offset = num_elements * sizeof(cl_uint) / num_copies;
|
||||
for (i=0; i<num_copies; i++)
|
||||
{
|
||||
size_t offset = i * delta_offset;
|
||||
err = clEnqueueCopyBuffer(queue, streams[0], results, offset, offset, delta_offset, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueCopyBuffer failed");
|
||||
}
|
||||
|
||||
// Try upload from client backing
|
||||
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (input_ptr[i] != output_ptr[i])
|
||||
{
|
||||
err = -1;
|
||||
error_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (err)
|
||||
log_error("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer FAILED\n");
|
||||
else
|
||||
log_info("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer passed\n");
|
||||
|
||||
|
||||
|
||||
#pragma mark framework backing (no client data)
|
||||
|
||||
log_info("Testing with clEnqueueWriteBuffer and clEnqueueCopyBuffer\n");
|
||||
// randomize data
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
|
||||
// no backing
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE) , sizeof(cl_uint) * num_elements, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
for (i=0; i<num_copies; i++)
|
||||
{
|
||||
size_t offset = i * delta_offset;
|
||||
|
||||
// Copy the array up from host ptr
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_uint)*num_elements, input_ptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = clEnqueueCopyBuffer(queue, streams[2], results, offset, offset, delta_offset, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueCopyBuffer failed");
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, results, true, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (input_ptr[i] != output_ptr[i])
|
||||
{
|
||||
err = -1;
|
||||
error_count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err)
|
||||
log_error("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer FAILED\n");
|
||||
else
|
||||
log_info("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer passed\n");
|
||||
|
||||
/*****************************************************************************************************************************************/
|
||||
#pragma mark kernel copy test
|
||||
|
||||
log_info("Testing CL_MEM_USE_HOST_PTR buffer with kernel copy\n");
|
||||
// randomize data
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
free_mtdata(d); d= NULL;
|
||||
|
||||
// client backing
|
||||
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, ©_kernel_code, "test_copy" );
|
||||
test_error(err, "create_single_kernel_helper failed");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[3], &streams[3]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof results, &results);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
size_t threads[3] = {num_elements, 0, 0};
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (input_ptr[i] != output_ptr[i])
|
||||
{
|
||||
err = -1;
|
||||
error_count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Keep track of multiple errors.
|
||||
if (error_count != 0)
|
||||
err = error_count;
|
||||
|
||||
if (err)
|
||||
log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n");
|
||||
else
|
||||
log_info("\tCL_MEM_USE_HOST_PTR buffer with kernel copy passed\n");
|
||||
|
||||
|
||||
clReleaseProgram(program);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseMemObject(results);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
143
test_conformance/basic/test_arrayimagecopy.c
Normal file
143
test_conformance/basic/test_arrayimagecopy.c
Normal file
@@ -0,0 +1,143 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
int test_arrayimagecopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
|
||||
{
|
||||
cl_uchar *bufptr, *imgptr;
|
||||
clMemWrapper buffer, image;
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
size_t elem_size;
|
||||
size_t buffer_size;
|
||||
int i;
|
||||
cl_int err;
|
||||
MTdata d;
|
||||
cl_event copyevent;
|
||||
|
||||
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
|
||||
|
||||
image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
|
||||
test_error(err, "clGetImageInfo failed");
|
||||
|
||||
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
|
||||
|
||||
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
bufptr = (cl_uchar*)malloc(buffer_size);
|
||||
for (i=0; i<(int)buffer_size; i++) {
|
||||
bufptr[i] = (cl_uchar)genrand_int32(d);
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
|
||||
err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, ©event );
|
||||
test_error(err, "clEnqueueCopyImageToBuffer failed");
|
||||
|
||||
imgptr = (cl_uchar*)malloc(buffer_size);
|
||||
|
||||
err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, ©event, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
if (memcmp(bufptr, imgptr, buffer_size) != 0) {
|
||||
log_error( "ERROR: Results did not validate!\n" );
|
||||
unsigned char * inchar = (unsigned char*)bufptr;
|
||||
unsigned char * outchar = (unsigned char*)imgptr;
|
||||
int failuresPrinted = 0;
|
||||
int i;
|
||||
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
|
||||
int failed = 0;
|
||||
int j;
|
||||
for (j=0; j<(int)elem_size; j++)
|
||||
if (inchar[i+j] != outchar[i+j])
|
||||
failed = 1;
|
||||
char values[4096];
|
||||
values[0] = 0;
|
||||
if (failed) {
|
||||
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
|
||||
int j;
|
||||
for (j=0; j<(int)elem_size; j++)
|
||||
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
|
||||
sprintf(values + strlen(values), "] != expected [");
|
||||
for (j=0; j<(int)elem_size; j++)
|
||||
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
|
||||
sprintf(values + strlen(values), "]");
|
||||
log_error("%s\n", values);
|
||||
failuresPrinted++;
|
||||
}
|
||||
if (failuresPrinted > 5) {
|
||||
log_error("Not printing further failures...\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
err = -1;
|
||||
}
|
||||
|
||||
free(bufptr);
|
||||
free(imgptr);
|
||||
|
||||
if (err)
|
||||
log_error("ARRAY to IMAGE copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
|
||||
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int test_arrayimagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_int err;
|
||||
cl_image_format *formats;
|
||||
cl_uint num_formats;
|
||||
cl_uint i;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
|
||||
test_error(err, "clGetSupportedImageFormats failed");
|
||||
|
||||
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
|
||||
|
||||
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
|
||||
test_error(err, "clGetSupportedImageFormats failed");
|
||||
|
||||
for (i = 0; i < num_formats; i++) {
|
||||
err |= test_arrayimagecopy_single_format(device, context, queue, &formats[i]);
|
||||
}
|
||||
|
||||
free(formats);
|
||||
if (err)
|
||||
log_error("ARRAY to IMAGE copy test failed\n");
|
||||
else
|
||||
log_info("ARRAY to IMAGE copy test passed\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
144
test_conformance/basic/test_arrayimagecopy3d.c
Normal file
144
test_conformance/basic/test_arrayimagecopy3d.c
Normal file
@@ -0,0 +1,144 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
int test_arrayimagecopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
|
||||
{
|
||||
cl_uchar *bufptr, *imgptr;
|
||||
clMemWrapper buffer, image;
|
||||
int img_width = 128;
|
||||
int img_height = 128;
|
||||
int img_depth = 32;
|
||||
size_t elem_size;
|
||||
size_t buffer_size;
|
||||
int i;
|
||||
cl_int err;
|
||||
MTdata d;
|
||||
cl_event copyevent;
|
||||
|
||||
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
|
||||
|
||||
image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
|
||||
test_error(err, "clGetImageInfo failed");
|
||||
|
||||
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
|
||||
|
||||
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
bufptr = (cl_uchar*)malloc(buffer_size);
|
||||
for (i=0; i<(int)buffer_size; i++) {
|
||||
bufptr[i] = (cl_uchar)genrand_int32(d);
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
|
||||
err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, ©event );
|
||||
test_error(err, "clEnqueueCopyImageToBuffer failed");
|
||||
|
||||
imgptr = (cl_uchar*)malloc(buffer_size);
|
||||
|
||||
err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, ©event, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
if (memcmp(bufptr, imgptr, buffer_size) != 0) {
|
||||
log_error( "ERROR: Results did not validate!\n" );
|
||||
unsigned char * inchar = (unsigned char*)bufptr;
|
||||
unsigned char * outchar = (unsigned char*)imgptr;
|
||||
int failuresPrinted = 0;
|
||||
int i;
|
||||
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
|
||||
int failed = 0;
|
||||
int j;
|
||||
for (j=0; j<(int)elem_size; j++)
|
||||
if (inchar[i+j] != outchar[i+j])
|
||||
failed = 1;
|
||||
char values[4096];
|
||||
values[0] = 0;
|
||||
if (failed) {
|
||||
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
|
||||
int j;
|
||||
for (j=0; j<(int)elem_size; j++)
|
||||
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
|
||||
sprintf(values + strlen(values), "] != expected [");
|
||||
for (j=0; j<(int)elem_size; j++)
|
||||
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
|
||||
sprintf(values + strlen(values), "]");
|
||||
log_error("%s\n", values);
|
||||
failuresPrinted++;
|
||||
}
|
||||
if (failuresPrinted > 5) {
|
||||
log_error("Not printing further failures...\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
err = -1;
|
||||
}
|
||||
|
||||
free(bufptr);
|
||||
free(imgptr);
|
||||
|
||||
if (err)
|
||||
log_error("ARRAY to IMAGE3D copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
|
||||
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int test_arrayimagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_int err;
|
||||
cl_image_format *formats;
|
||||
cl_uint num_formats;
|
||||
cl_uint i;
|
||||
|
||||
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
|
||||
|
||||
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
|
||||
test_error(err, "clGetSupportedImageFormats failed");
|
||||
|
||||
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
|
||||
|
||||
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
|
||||
test_error(err, "clGetSupportedImageFormats failed");
|
||||
|
||||
for (i = 0; i < num_formats; i++) {
|
||||
err |= test_arrayimagecopy3d_single_format(device, context, queue, &formats[i]);
|
||||
}
|
||||
|
||||
free(formats);
|
||||
if (err)
|
||||
log_error("ARRAY to IMAGE3D copy test failed\n");
|
||||
else
|
||||
log_info("ARRAY to IMAGE3D copy test passed\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
95
test_conformance/basic/test_arrayreadwrite.c
Normal file
95
test_conformance/basic/test_arrayreadwrite.c
Normal file
@@ -0,0 +1,95 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
|
||||
int
|
||||
test_arrayreadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_uint *inptr, *outptr;
|
||||
cl_mem streams[1];
|
||||
int num_tries = 400;
|
||||
num_elements = 1024 * 1024 * 4;
|
||||
int i, j, err;
|
||||
MTdata d;
|
||||
|
||||
inptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
|
||||
outptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
|
||||
|
||||
// randomize data
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
inptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
for (i=0; i<num_tries; i++)
|
||||
{
|
||||
int offset;
|
||||
int cb;
|
||||
|
||||
do {
|
||||
offset = (int)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
if (offset > 0 && offset < num_elements)
|
||||
break;
|
||||
} while (1);
|
||||
cb = (int)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
if (cb > (num_elements - offset))
|
||||
cb = num_elements - offset;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), sizeof(cl_uint)*cb,&inptr[offset], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), cb*sizeof(cl_uint), &outptr[offset], 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (j=offset; j<offset+cb; j++)
|
||||
{
|
||||
if (inptr[j] != outptr[j])
|
||||
{
|
||||
log_error("ARRAY read, write test failed\n");
|
||||
err = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
clReleaseMemObject(streams[0]);
|
||||
free(inptr);
|
||||
free(outptr);
|
||||
|
||||
if (!err)
|
||||
log_info("ARRAY read, write test passed\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
288
test_conformance/basic/test_astype.cpp
Normal file
288
test_conformance/basic/test_astype.cpp
Normal file
@@ -0,0 +1,288 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
|
||||
static const char *astype_kernel_pattern =
|
||||
"%s\n"
|
||||
"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s%s tmp = as_%s%s( src[ tid ] );\n"
|
||||
" dst[ tid ] = tmp;\n"
|
||||
"}\n";
|
||||
|
||||
static const char *astype_kernel_pattern_V3srcV3dst =
|
||||
"%s\n"
|
||||
"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
|
||||
" vstore3(tmp,tid,dst);\n"
|
||||
"}\n";
|
||||
// in the printf, remove the third and fifth argument, each of which
|
||||
// should be a "3", when copying from the printf for astype_kernel_pattern
|
||||
|
||||
static const char *astype_kernel_pattern_V3dst =
|
||||
"%s\n"
|
||||
"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s3 tmp = as_%s3( src[ tid ] );\n"
|
||||
" vstore3(tmp,tid,dst);\n"
|
||||
"}\n";
|
||||
// in the printf, remove the fifth argument, which
|
||||
// should be a "3", when copying from the printf for astype_kernel_pattern
|
||||
|
||||
|
||||
static const char *astype_kernel_pattern_V3src =
|
||||
"%s\n"
|
||||
"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
|
||||
" dst[ tid ] = tmp;\n"
|
||||
"}\n";
|
||||
// in the printf, remove the third argument, which
|
||||
// should be a "3", when copying from the printf for astype_kernel_pattern
|
||||
|
||||
|
||||
int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
|
||||
unsigned int vecSize, unsigned int outVecSize,
|
||||
int numElements )
|
||||
{
|
||||
int error;
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 2 ];
|
||||
|
||||
char programSrc[ 10240 ];
|
||||
size_t threads[ 1 ], localThreads[ 1 ];
|
||||
size_t typeSize = get_explicit_type_size( inVecType );
|
||||
size_t outTypeSize = get_explicit_type_size(outVecType);
|
||||
char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
|
||||
MTdata d;
|
||||
|
||||
|
||||
|
||||
// Create program
|
||||
if(outVecSize == 3 && vecSize == 3) {
|
||||
// astype_kernel_pattern_V3srcV3dst
|
||||
sprintf( programSrc, astype_kernel_pattern_V3srcV3dst,
|
||||
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_type_name( inVecType ), // sizeNames[ vecSize ],
|
||||
get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ],
|
||||
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
|
||||
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] );
|
||||
} else if(outVecSize == 3) {
|
||||
// astype_kernel_pattern_V3dst
|
||||
sprintf( programSrc, astype_kernel_pattern_V3dst,
|
||||
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
|
||||
get_explicit_type_name( outVecType ),
|
||||
get_explicit_type_name( outVecType ),
|
||||
get_explicit_type_name( outVecType ));
|
||||
|
||||
} else if(vecSize == 3) {
|
||||
// astype_kernel_pattern_V3src
|
||||
sprintf( programSrc, astype_kernel_pattern_V3src,
|
||||
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_type_name( inVecType ),// sizeNames[ vecSize ],
|
||||
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
|
||||
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
|
||||
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
|
||||
} else {
|
||||
sprintf( programSrc, astype_kernel_pattern,
|
||||
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
|
||||
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
|
||||
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
|
||||
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
|
||||
}
|
||||
|
||||
const char *ptr = programSrc;
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
|
||||
// Create some input values
|
||||
size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
|
||||
char *inBuffer = (char*)malloc( inBufferSize );
|
||||
size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
|
||||
char *outBuffer = (char*)malloc( outBufferSize );
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
generate_random_data( inVecType, numElements * vecSize,
|
||||
d, inBuffer );
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
// Create I/O streams and set arguments
|
||||
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error );
|
||||
test_error( error, "Unable to create I/O stream" );
|
||||
streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
|
||||
test_error( error, "Unable to create I/O stream" );
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
|
||||
|
||||
// Run the kernel
|
||||
threads[ 0 ] = numElements;
|
||||
error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
|
||||
test_error( error, "Unable to get group size to run with" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to run kernel" );
|
||||
|
||||
|
||||
// Get the results and compare
|
||||
// The beauty is that astype is supposed to return the bit pattern as a different type, which means
|
||||
// the output should have the exact same bit pattern as the input. No interpretation necessary!
|
||||
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
char *expected = inBuffer;
|
||||
char *actual = outBuffer;
|
||||
size_t compSize = typeSize*vecSize;
|
||||
if(outTypeSize*outVecSize < compSize) {
|
||||
compSize = outTypeSize*outVecSize;
|
||||
}
|
||||
|
||||
if(outVecSize == 4 && vecSize == 3)
|
||||
{
|
||||
// as_type4(vec3) should compile but produce undefined results??
|
||||
free(inBuffer);
|
||||
free(outBuffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(outVecSize != 3 && vecSize != 3 && outVecSize != vecSize)
|
||||
{
|
||||
// as_typen(vecm) should compile and run but produce
|
||||
// implementation-defined results for m != n
|
||||
// and n*sizeof(type) = sizeof(vecm)
|
||||
free(inBuffer);
|
||||
free(outBuffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for( int i = 0; i < numElements; i++ )
|
||||
{
|
||||
if( memcmp( expected, actual, compSize ) != 0 )
|
||||
{
|
||||
char expectedString[ 1024 ], actualString[ 1024 ];
|
||||
log_error( "ERROR: Data sample %d of %d for as_%s%d( %s%d ) did not validate (expected {%s}, got {%s})\n",
|
||||
(int)i, (int)numElements, get_explicit_type_name( outVecType ), vecSize, get_explicit_type_name( inVecType ), vecSize,
|
||||
GetDataVectorString( expected, typeSize, vecSize, expectedString ),
|
||||
GetDataVectorString( actual, typeSize, vecSize, actualString ) );
|
||||
log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
|
||||
programSrc, (int)threads[0],(int) localThreads[0]);
|
||||
free(inBuffer);
|
||||
free(outBuffer);
|
||||
return 1;
|
||||
}
|
||||
expected += typeSize * vecSize;
|
||||
actual += outTypeSize * outVecSize;
|
||||
}
|
||||
|
||||
free(inBuffer);
|
||||
free(outBuffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
// Note: although casting to different vector element sizes that match the same size (i.e. short2 -> char4) is
|
||||
// legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
|
||||
// for us to verify what is "valid". So the only thing we can test are types that match in size independent
|
||||
// of the element count (char -> uchar, etc)
|
||||
ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
|
||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||
unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
|
||||
size_t inTypeSize, outTypeSize;
|
||||
int error = 0;
|
||||
|
||||
for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ )
|
||||
{
|
||||
inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
|
||||
|
||||
if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
|
||||
continue;
|
||||
|
||||
if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong )
|
||||
continue;
|
||||
|
||||
for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ )
|
||||
{
|
||||
outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
|
||||
if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
|
||||
continue;
|
||||
|
||||
// change this check
|
||||
if( inTypeIdx == outTypeIdx ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
log_info( " (%s->%s)\n", get_explicit_type_name( vecTypes[ inTypeIdx ] ), get_explicit_type_name( vecTypes[ outTypeIdx ] ) );
|
||||
fflush( stdout );
|
||||
|
||||
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
|
||||
{
|
||||
|
||||
for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
|
||||
{
|
||||
if(vecSizes[sizeIdx]*inTypeSize !=
|
||||
vecSizes[outSizeIdx]*outTypeSize )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
|
||||
get_explicit_type_size(vecTypes[outTypeIdx])) {
|
||||
// as_type3(vec4) allowed, as_type4(vec3) not allowed
|
||||
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 3, 4, n_elems );
|
||||
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 4, 3, n_elems );
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
279
test_conformance/basic/test_async_copy.cpp
Normal file
279
test_conformance/basic/test_async_copy.cpp
Normal file
@@ -0,0 +1,279 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
|
||||
static const char *async_global_to_local_kernel =
|
||||
"%s\n" // optional pragma string
|
||||
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
|
||||
"{\n"
|
||||
" int i;\n"
|
||||
// Zero the local storage first
|
||||
" for(i=0; i<copiesPerWorkItem; i++)\n"
|
||||
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
|
||||
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" event_t event;\n"
|
||||
" event = async_work_group_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );\n"
|
||||
// Wait for the copy to complete, then verify by manually copying to the dest
|
||||
" wait_group_events( 1, &event );\n"
|
||||
" for(i=0; i<copiesPerWorkItem; i++)\n"
|
||||
" dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
|
||||
"}\n" ;
|
||||
|
||||
static const char *async_local_to_global_kernel =
|
||||
"%s\n" // optional pragma string
|
||||
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
|
||||
"{\n"
|
||||
" int i;\n"
|
||||
// Zero the local storage first
|
||||
" for(i=0; i<copiesPerWorkItem; i++)\n"
|
||||
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
|
||||
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" for(i=0; i<copiesPerWorkItem; i++)\n"
|
||||
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
|
||||
// Do this to verify all kernels are done copying to the local buffer before we try the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" event_t event;\n"
|
||||
" event = async_work_group_copy((__global %s*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, 0 );\n"
|
||||
" wait_group_events( 1, &event );\n"
|
||||
"}\n" ;
|
||||
|
||||
|
||||
static const char *prefetch_kernel =
|
||||
"%s\n" // optional pragma string
|
||||
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
|
||||
"{\n"
|
||||
" // Ignore this: %s%s%s\n"
|
||||
" int i;\n"
|
||||
" prefetch( (const __global %s*)(src+copiesPerWorkItem*get_global_id(0)), copiesPerWorkItem);\n"
|
||||
" for(i=0; i<copiesPerWorkItem; i++)\n"
|
||||
" dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
|
||||
"}\n" ;
|
||||
|
||||
|
||||
|
||||
int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode,
|
||||
ExplicitType vecType, int vecSize
|
||||
)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 2 ];
|
||||
size_t threads[ 1 ], localThreads[ 1 ];
|
||||
void *inBuffer, *outBuffer;
|
||||
MTdata d;
|
||||
char vecNameString[64]; vecNameString[0] = 0;
|
||||
if (vecSize == 1)
|
||||
sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
|
||||
else
|
||||
sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
|
||||
|
||||
|
||||
size_t elementSize = get_explicit_type_size(vecType)*vecSize;
|
||||
log_info("Testing %s\n", vecNameString);
|
||||
|
||||
cl_long max_local_mem_size;
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
|
||||
test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
|
||||
|
||||
unsigned int num_of_compute_devices;
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
|
||||
test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
|
||||
|
||||
char programSource[4096]; programSource[0]=0;
|
||||
char *programPtr;
|
||||
|
||||
sprintf(programSource, kernelCode,
|
||||
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
|
||||
//log_info("program: %s\n", programSource);
|
||||
programPtr = programSource;
|
||||
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
size_t max_workgroup_size;
|
||||
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
|
||||
test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
|
||||
|
||||
size_t max_local_workgroup_size[3];
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
||||
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
// Pick the minimum of the device and the kernel
|
||||
if (max_workgroup_size > max_local_workgroup_size[0])
|
||||
max_workgroup_size = max_local_workgroup_size[0];
|
||||
|
||||
size_t numberOfCopiesPerWorkitem = 13;
|
||||
elementSize = get_explicit_type_size(vecType)* ((vecSize == 3) ? 4 : vecSize);
|
||||
size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
|
||||
size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
|
||||
|
||||
// Calculation can return 0 on embedded devices due to 1KB local mem limit
|
||||
if(maxLocalWorkgroupSize == 0)
|
||||
{
|
||||
maxLocalWorkgroupSize = 1;
|
||||
}
|
||||
|
||||
size_t localWorkgroupSize = maxLocalWorkgroupSize;
|
||||
if (maxLocalWorkgroupSize > max_workgroup_size)
|
||||
localWorkgroupSize = max_workgroup_size;
|
||||
|
||||
size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
|
||||
size_t numberOfLocalWorkgroups = 1111;
|
||||
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize;
|
||||
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
|
||||
|
||||
inBuffer = (void*)malloc(globalBufferSize);
|
||||
outBuffer = (void*)malloc(globalBufferSize);
|
||||
memset(outBuffer, 0, globalBufferSize);
|
||||
|
||||
cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
|
||||
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
|
||||
copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
|
||||
|
||||
log_info("Global: %d, local %d, local buffer %db, global buffer %db, each work group will copy %d elements and each work item item will copy %d elements.\n",
|
||||
(int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, copiesPerWorkgroup, copiesPerWorkItemInt);
|
||||
|
||||
threads[0] = globalWorkgroupSize;
|
||||
localThreads[0] = localWorkgroupSize;
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
|
||||
test_error( error, "Unable to create input buffer" );
|
||||
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
|
||||
// Enqueue
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to queue kernel" );
|
||||
|
||||
// Read
|
||||
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
// Verify
|
||||
int failuresPrinted = 0;
|
||||
if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 )
|
||||
{
|
||||
size_t typeSize = get_explicit_type_size(vecType)* vecSize;
|
||||
unsigned char * inchar = (unsigned char*)inBuffer;
|
||||
unsigned char * outchar = (unsigned char*)outBuffer;
|
||||
for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) {
|
||||
if (memcmp( ((char *)inchar)+i, ((char *)outchar)+i, typeSize) != 0 )
|
||||
{
|
||||
char values[4096];
|
||||
values[0] = 0;
|
||||
if ( failuresPrinted == 0 ) {
|
||||
// Print first failure message
|
||||
log_error( "ERROR: Results of copy did not validate!\n" );
|
||||
}
|
||||
sprintf(values + strlen( values), "%d -> [", i);
|
||||
for (int j=0; j<(int)elementSize; j++)
|
||||
sprintf(values + strlen( values), "%2x ", inchar[i+j]);
|
||||
sprintf(values + strlen(values), "] != [");
|
||||
for (int j=0; j<(int)elementSize; j++)
|
||||
sprintf(values + strlen( values), "%2x ", outchar[i+j]);
|
||||
sprintf(values + strlen(values), "]");
|
||||
log_error("%s\n", values);
|
||||
failuresPrinted++;
|
||||
}
|
||||
|
||||
if (failuresPrinted > 5) {
|
||||
log_error("Not printing further failures...\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(inBuffer);
|
||||
free(outBuffer);
|
||||
|
||||
return failuresPrinted ? -1 : 0;
|
||||
}
|
||||
|
||||
int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) {
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
|
||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||
unsigned int size, typeIndex;
|
||||
|
||||
int errors = 0;
|
||||
|
||||
for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
|
||||
{
|
||||
if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
|
||||
continue;
|
||||
|
||||
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
|
||||
continue;
|
||||
|
||||
for( size = 0; vecSizes[ size ] != 0; size++ )
|
||||
{
|
||||
if (test_copy( deviceID, context, queue, kernelCode, vecType[typeIndex],vecSizes[size] )) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (errors)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel );
|
||||
}
|
||||
|
||||
int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_copy_all_types( deviceID, context, queue, async_local_to_global_kernel );
|
||||
}
|
||||
|
||||
int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_copy_all_types( deviceID, context, queue, prefetch_kernel );
|
||||
}
|
||||
|
||||
274
test_conformance/basic/test_async_strided_copy.cpp
Normal file
274
test_conformance/basic/test_async_strided_copy.cpp
Normal file
@@ -0,0 +1,274 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
|
||||
static const char *async_strided_global_to_local_kernel =
|
||||
"%s\n" // optional pragma string
|
||||
"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
|
||||
"{\n"
|
||||
" int i;\n"
|
||||
// Zero the local storage first
|
||||
" for(i=0; i<copiesPerWorkItem; i++)\n"
|
||||
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
|
||||
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" event_t event;\n"
|
||||
" event = async_work_group_strided_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*stride*get_group_id(0)), (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
|
||||
// Wait for the copy to complete, then verify by manually copying to the dest
|
||||
" wait_group_events( 1, &event );\n"
|
||||
" for(i=0; i<copiesPerWorkItem; i++)\n"
|
||||
" dst[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
|
||||
"}\n" ;
|
||||
|
||||
static const char *async_strided_local_to_global_kernel =
|
||||
"%s\n" // optional pragma string
|
||||
"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
|
||||
"{\n"
|
||||
" int i;\n"
|
||||
// Zero the local storage first
|
||||
" for(i=0; i<copiesPerWorkItem; i++)\n"
|
||||
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
|
||||
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" for(i=0; i<copiesPerWorkItem; i++)\n"
|
||||
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ];\n"
|
||||
// Do this to verify all kernels are done copying to the local buffer before we try the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" event_t event;\n"
|
||||
" event = async_work_group_strided_copy((__global %s*)(dst+copiesPerWorkgroup*stride*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
|
||||
" wait_group_events( 1, &event );\n"
|
||||
"}\n" ;
|
||||
|
||||
|
||||
int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 2 ];
|
||||
size_t threads[ 1 ], localThreads[ 1 ];
|
||||
void *inBuffer, *outBuffer;
|
||||
MTdata d;
|
||||
char vecNameString[64]; vecNameString[0] = 0;
|
||||
|
||||
if (vecSize == 1)
|
||||
sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
|
||||
else
|
||||
sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
|
||||
|
||||
|
||||
log_info("Testing %s\n", vecNameString);
|
||||
|
||||
cl_long max_local_mem_size;
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
|
||||
test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
|
||||
|
||||
unsigned int num_of_compute_devices;
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
|
||||
test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
|
||||
|
||||
char programSource[4096]; programSource[0]=0;
|
||||
char *programPtr;
|
||||
|
||||
sprintf(programSource, kernelCode,
|
||||
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
"",
|
||||
vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
|
||||
//log_info("program: %s\n", programSource);
|
||||
programPtr = programSource;
|
||||
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
size_t max_workgroup_size;
|
||||
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
|
||||
test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
|
||||
|
||||
size_t max_local_workgroup_size[3];
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
||||
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
// Pick the minimum of the device and the kernel
|
||||
if (max_workgroup_size > max_local_workgroup_size[0])
|
||||
max_workgroup_size = max_local_workgroup_size[0];
|
||||
|
||||
size_t elementSize = get_explicit_type_size(vecType)* ((vecSize == 3) ? 4 : vecSize);
|
||||
|
||||
cl_ulong max_global_mem_size;
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(max_global_mem_size), &max_global_mem_size, NULL);
|
||||
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
|
||||
|
||||
if (max_global_mem_size > (cl_ulong)SIZE_MAX) {
|
||||
max_global_mem_size = (cl_ulong)SIZE_MAX;
|
||||
}
|
||||
|
||||
cl_bool unified_mem;
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(unified_mem), &unified_mem, NULL);
|
||||
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
|
||||
|
||||
int number_of_global_mem_buffers = (unified_mem) ? 4 : 2;
|
||||
|
||||
size_t numberOfCopiesPerWorkitem = 3;
|
||||
size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
|
||||
size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
|
||||
|
||||
size_t localWorkgroupSize = maxLocalWorkgroupSize;
|
||||
if (maxLocalWorkgroupSize > max_workgroup_size)
|
||||
localWorkgroupSize = max_workgroup_size;
|
||||
|
||||
size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
|
||||
size_t numberOfLocalWorkgroups = 579;//1111;
|
||||
|
||||
// Reduce the numberOfLocalWorkgroups so that no more than 1/2 of CL_DEVICE_GLOBAL_MEM_SIZE is consumed
|
||||
// by the allocated buffer. This is done to avoid resource errors resulting from address space fragmentation.
|
||||
size_t numberOfLocalWorkgroupsLimit = max_global_mem_size / (2 * number_of_global_mem_buffers * localBufferSize * stride);
|
||||
if (numberOfLocalWorkgroups > numberOfLocalWorkgroupsLimit) numberOfLocalWorkgroups = numberOfLocalWorkgroupsLimit;
|
||||
|
||||
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride;
|
||||
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
|
||||
|
||||
inBuffer = (void*)malloc(globalBufferSize);
|
||||
outBuffer = (void*)malloc(globalBufferSize);
|
||||
memset(outBuffer, 0, globalBufferSize);
|
||||
|
||||
cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
|
||||
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
|
||||
copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
|
||||
|
||||
log_info("Global: %d, local %d, local buffer %db, global buffer %db, copy stride %d, each work group will copy %d elements and each work item item will copy %d elements.\n",
|
||||
(int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, (int)stride, copiesPerWorkgroup, copiesPerWorkItemInt);
|
||||
|
||||
threads[0] = globalWorkgroupSize;
|
||||
localThreads[0] = localWorkgroupSize;
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
|
||||
test_error( error, "Unable to create input buffer" );
|
||||
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 5, sizeof(stride), &stride );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
|
||||
// Enqueue
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to queue kernel" );
|
||||
|
||||
// Read
|
||||
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
// Verify
|
||||
size_t typeSize = get_explicit_type_size(vecType)* vecSize;
|
||||
for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride)
|
||||
{
|
||||
if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, typeSize) != 0 )
|
||||
{
|
||||
unsigned char * inchar = (unsigned char*)inBuffer + i;
|
||||
unsigned char * outchar = (unsigned char*)outBuffer + i;
|
||||
char values[4096];
|
||||
values[0] = 0;
|
||||
|
||||
log_error( "ERROR: Results of copy did not validate!\n" );
|
||||
sprintf(values + strlen( values), "%d -> [", i);
|
||||
for (int j=0; j<(int)elementSize; j++)
|
||||
sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]);
|
||||
sprintf(values + strlen(values), "] != [");
|
||||
for (int j=0; j<(int)elementSize; j++)
|
||||
sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]);
|
||||
sprintf(values + strlen(values), "]");
|
||||
log_error("%s\n", values);
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
free(inBuffer);
|
||||
free(outBuffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode)
|
||||
{
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
|
||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||
unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
|
||||
unsigned int size, typeIndex, stride;
|
||||
|
||||
int errors = 0;
|
||||
|
||||
for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
|
||||
{
|
||||
if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
|
||||
continue;
|
||||
|
||||
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
|
||||
continue;
|
||||
|
||||
for( size = 0; vecSizes[ size ] != 0; size++ )
|
||||
{
|
||||
for( stride = 0; strideSizes[ stride ] != 0; stride++)
|
||||
{
|
||||
if (test_strided_copy( deviceID, context, queue, kernelCode, vecType[typeIndex], vecSizes[size], strideSizes[stride] ))
|
||||
{
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (errors)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel );
|
||||
}
|
||||
|
||||
int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_strided_copy_all_types( deviceID, context, queue, async_strided_local_to_global_kernel );
|
||||
}
|
||||
|
||||
159
test_conformance/basic/test_barrier.c
Normal file
159
test_conformance/basic/test_barrier.c
Normal file
@@ -0,0 +1,159 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *barrier_kernel_code =
|
||||
"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
|
||||
"{\n"
|
||||
" int tid = get_local_id(0);\n"
|
||||
" int lsize = get_local_size(0);\n"
|
||||
" int i;\n"
|
||||
"\n"
|
||||
" tmp_sum[tid] = 0;\n"
|
||||
" for (i=tid; i<n; i+=lsize)\n"
|
||||
" tmp_sum[tid] += a[i];\n"
|
||||
" \n"
|
||||
" // updated to work for any workgroup size \n"
|
||||
" for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
|
||||
" {\n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
" if (tid + i < lsize)\n"
|
||||
" tmp_sum[tid] += tmp_sum[tid + i];\n"
|
||||
" lsize = i; \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
|
||||
" if (tid == 0)\n"
|
||||
" *sum = tmp_sum[0];\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static int
|
||||
verify_sum(int *inptr, int *outptr, int n)
|
||||
{
|
||||
int r = 0;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r += inptr[i];
|
||||
}
|
||||
|
||||
if (r != outptr[0])
|
||||
{
|
||||
log_error("BARRIER test failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info("BARRIER test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[3];
|
||||
cl_int *input_ptr = NULL, *output_ptr = NULL;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t global_threads[3];
|
||||
size_t local_threads[3];
|
||||
int err;
|
||||
int i;
|
||||
size_t max_local_workgroup_size[3];
|
||||
size_t max_threadgroup_size = 0;
|
||||
MTdata d;
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" );
|
||||
test_error(err, "Failed to build kernel/program.");
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
|
||||
sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
|
||||
test_error(err, "clGetKernelWorkgroupInfo failed.");
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
// Pick the minimum of the device and the kernel
|
||||
if (max_threadgroup_size > max_local_workgroup_size[0])
|
||||
max_threadgroup_size = max_local_workgroup_size[0];
|
||||
|
||||
// work group size must divide evenly into the global size
|
||||
while( num_elements % max_threadgroup_size )
|
||||
max_threadgroup_size--;
|
||||
|
||||
input_ptr = (int*)malloc(sizeof(int) * num_elements);
|
||||
output_ptr = (int*)malloc(sizeof(int));
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int), NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * max_threadgroup_size, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed.");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
|
||||
test_error(err, "clSetKernelArg failed.");
|
||||
|
||||
global_threads[0] = max_threadgroup_size;
|
||||
local_threads[0] = max_threadgroup_size;
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
err = verify_sum(input_ptr, output_ptr, num_elements);
|
||||
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
303
test_conformance/basic/test_basic_parameter_types.c
Normal file
303
test_conformance/basic/test_basic_parameter_types.c
Normal file
@@ -0,0 +1,303 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *kernel_code =
|
||||
"__kernel void test_kernel(\n"
|
||||
"char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,\n"
|
||||
"__global float%s *result)\n"
|
||||
"{\n"
|
||||
" result[0] = %s(c);\n"
|
||||
" result[1] = %s(uc);\n"
|
||||
" result[2] = %s(s);\n"
|
||||
" result[3] = %s(us);\n"
|
||||
" result[4] = %s(i);\n"
|
||||
" result[5] = %s(ui);\n"
|
||||
" result[6] = f;\n"
|
||||
"}\n";
|
||||
|
||||
const char *kernel_code_long =
|
||||
"__kernel void test_kernel_long(\n"
|
||||
"long%s l, ulong%s ul,\n"
|
||||
"__global float%s *result)\n"
|
||||
"{\n"
|
||||
" result[0] = %s(l);\n"
|
||||
" result[1] = %s(ul);\n"
|
||||
"}\n";
|
||||
|
||||
int
|
||||
test_basic_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clMemWrapper results;
|
||||
int error;
|
||||
size_t global[3] = {1, 1, 1};
|
||||
float results_back[2*16];
|
||||
int count, index;
|
||||
const char* types[] = { "long", "ulong" };
|
||||
char kernel_string[8192];
|
||||
int sizes[] = {1, 2, 4, 8, 16};
|
||||
const char* size_strings[] = {"", "2", "4", "8", "16"};
|
||||
float expected;
|
||||
int total_errors = 0;
|
||||
int size_to_test;
|
||||
char *ptr;
|
||||
char convert_string[1024];
|
||||
size_t max_parameter_size;
|
||||
|
||||
// We don't really care about the contents since we're just testing that the types work.
|
||||
cl_long l[16]={-21,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
|
||||
cl_ulong ul[16]={22,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
|
||||
// Calculate how large our paramter size is to the kernel
|
||||
size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong);
|
||||
|
||||
// Init our strings.
|
||||
kernel_string[0] = '\0';
|
||||
convert_string[0] = '\0';
|
||||
|
||||
// Get the maximum parameter size allowed
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
|
||||
test_error( error, "Unable to get max parameter size from device" );
|
||||
|
||||
// Create the results buffer
|
||||
results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
// Go over all the vector sizes
|
||||
for (size_to_test = 0; size_to_test < 5; size_to_test++) {
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
|
||||
if (total_parameter_size > max_parameter_size) {
|
||||
log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
|
||||
(int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
|
||||
continue;
|
||||
}
|
||||
|
||||
log_info("Testing vector size %d\n", sizes[size_to_test]);
|
||||
|
||||
// If size is > 1, then we need a explicit convert call.
|
||||
if (sizes[size_to_test] > 1) {
|
||||
sprintf(convert_string, "convert_float%s", size_strings[size_to_test]);
|
||||
} else {
|
||||
sprintf(convert_string, " ");
|
||||
}
|
||||
|
||||
// Build the kernel
|
||||
sprintf(kernel_string, kernel_code_long,
|
||||
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
|
||||
convert_string, convert_string
|
||||
);
|
||||
|
||||
ptr = kernel_string;
|
||||
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel_long");
|
||||
test_error(error, "create single kernel failed");
|
||||
|
||||
// Set the arguments
|
||||
for (count = 0; count < 2; count++) {
|
||||
switch (count) {
|
||||
case 0: error = clSetKernelArg(kernel, count, sizeof(cl_long)*sizes[size_to_test], &l); break;
|
||||
case 1: error = clSetKernelArg(kernel, count, sizeof(cl_ulong)*sizes[size_to_test], &ul); break;
|
||||
default: log_error("Test error"); break;
|
||||
}
|
||||
if (error)
|
||||
log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
|
||||
test_error(error, "clSetKernelArgs failed");
|
||||
}
|
||||
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results);
|
||||
test_error(error, "clSetKernelArgs failed");
|
||||
|
||||
// Execute
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
// Read back the results
|
||||
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*2*16, results_back, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
// Verify the results
|
||||
for (count = 0; count < 2; count++) {
|
||||
for (index=0; index < sizes[size_to_test]; index++) {
|
||||
switch (count) {
|
||||
case 0: expected = (float)l[index]; break;
|
||||
case 1: expected = (float)ul[index]; break;
|
||||
default: log_error("Test error"); break;
|
||||
}
|
||||
|
||||
if (results_back[count*sizes[size_to_test]+index] != expected) {
|
||||
total_errors++;
|
||||
log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
|
||||
index, results_back[count*sizes[size_to_test]+index], expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return total_errors;
|
||||
}
|
||||
|
||||
int
|
||||
test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clMemWrapper results;
|
||||
int error;
|
||||
size_t global[3] = {1, 1, 1};
|
||||
float results_back[7*16];
|
||||
int count, index;
|
||||
const char* types[] = {"char", "uchar", "short", "ushort", "int", "uint", "float"};
|
||||
char kernel_string[8192];
|
||||
int sizes[] = {1, 2, 4, 8, 16};
|
||||
const char* size_strings[] = {"", "2", "4", "8", "16"};
|
||||
float expected;
|
||||
int total_errors = 0;
|
||||
int size_to_test;
|
||||
char *ptr;
|
||||
char convert_string[1024];
|
||||
size_t max_parameter_size;
|
||||
|
||||
// We don't really care about the contents since we're just testing that the types work.
|
||||
cl_char c[16]={0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
|
||||
cl_uchar uc[16]={16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
cl_short s[16]={-17,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
|
||||
cl_ushort us[16]={18,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
cl_int i[16]={-19,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
|
||||
cl_uint ui[16]={20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
cl_float f[16]={-23,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
|
||||
|
||||
// Calculate how large our paramter size is to the kernel
|
||||
size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) +
|
||||
sizeof(cl_short) +sizeof(cl_ushort) +
|
||||
sizeof(cl_int) +sizeof(cl_uint) +
|
||||
sizeof(cl_float);
|
||||
|
||||
// Init our strings.
|
||||
kernel_string[0] = '\0';
|
||||
convert_string[0] = '\0';
|
||||
|
||||
// Get the maximum parameter size allowed
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
|
||||
test_error( error, "Unable to get max parameter size from device" );
|
||||
|
||||
// Create the results buffer
|
||||
results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
// Go over all the vector sizes
|
||||
for (size_to_test = 0; size_to_test < 5; size_to_test++) {
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
|
||||
if (total_parameter_size > max_parameter_size) {
|
||||
log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
|
||||
(int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
|
||||
continue;
|
||||
}
|
||||
|
||||
log_info("Testing vector size %d\n", sizes[size_to_test]);
|
||||
|
||||
// If size is > 1, then we need a explicit convert call.
|
||||
if (sizes[size_to_test] > 1) {
|
||||
sprintf(convert_string, "convert_float%s", size_strings[size_to_test]);
|
||||
} else {
|
||||
sprintf(convert_string, " ");
|
||||
}
|
||||
|
||||
// Build the kernel
|
||||
sprintf(kernel_string, kernel_code,
|
||||
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
|
||||
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
|
||||
size_strings[size_to_test], size_strings[size_to_test],
|
||||
convert_string, convert_string, convert_string,
|
||||
convert_string, convert_string, convert_string
|
||||
);
|
||||
|
||||
ptr = kernel_string;
|
||||
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel");
|
||||
test_error(error, "create single kernel failed");
|
||||
|
||||
// Set the arguments
|
||||
for (count = 0; count < 7; count++) {
|
||||
switch (count) {
|
||||
case 0: error = clSetKernelArg(kernel, count, sizeof(cl_char)*sizes[size_to_test], &c); break;
|
||||
case 1: error = clSetKernelArg(kernel, count, sizeof(cl_uchar)*sizes[size_to_test], &uc); break;
|
||||
case 2: error = clSetKernelArg(kernel, count, sizeof(cl_short)*sizes[size_to_test], &s); break;
|
||||
case 3: error = clSetKernelArg(kernel, count, sizeof(cl_ushort)*sizes[size_to_test], &us); break;
|
||||
case 4: error = clSetKernelArg(kernel, count, sizeof(cl_int)*sizes[size_to_test], &i); break;
|
||||
case 5: error = clSetKernelArg(kernel, count, sizeof(cl_uint)*sizes[size_to_test], &ui); break;
|
||||
case 6: error = clSetKernelArg(kernel, count, sizeof(cl_float)*sizes[size_to_test], &f); break;
|
||||
default: log_error("Test error"); break;
|
||||
}
|
||||
if (error)
|
||||
log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
|
||||
test_error(error, "clSetKernelArgs failed");
|
||||
}
|
||||
error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results);
|
||||
test_error(error, "clSetKernelArgs failed");
|
||||
|
||||
// Execute
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
// Read back the results
|
||||
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*7*16, results_back, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
// Verify the results
|
||||
for (count = 0; count < 7; count++) {
|
||||
for (index=0; index < sizes[size_to_test]; index++) {
|
||||
switch (count) {
|
||||
case 0: expected = (float)c[index]; break;
|
||||
case 1: expected = (float)uc[index]; break;
|
||||
case 2: expected = (float)s[index]; break;
|
||||
case 3: expected = (float)us[index]; break;
|
||||
case 4: expected = (float)i[index]; break;
|
||||
case 5: expected = (float)ui[index]; break;
|
||||
case 6: expected = (float)f[index]; break;
|
||||
default: log_error("Test error"); break;
|
||||
}
|
||||
|
||||
if (results_back[count*sizes[size_to_test]+index] != expected) {
|
||||
total_errors++;
|
||||
log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
|
||||
index, results_back[count*sizes[size_to_test]+index], expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (gHasLong) {
|
||||
log_info("Testing long types...\n");
|
||||
total_errors += test_basic_parameter_types_long( device, context, queue, num_elements );
|
||||
}
|
||||
else {
|
||||
log_info("Longs unsupported, skipping.");
|
||||
}
|
||||
|
||||
return total_errors;
|
||||
}
|
||||
|
||||
|
||||
|
||||
564
test_conformance/basic/test_bufferreadwriterect.c
Normal file
564
test_conformance/basic/test_bufferreadwriterect.c
Normal file
@@ -0,0 +1,564 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#define CL_EXIT_ERROR(cmd,format,...) \
|
||||
{ \
|
||||
if ((cmd) != CL_SUCCESS) { \
|
||||
log_error("CL ERROR: %s %u: ", __FILE__,__LINE__); \
|
||||
log_error(format,## __VA_ARGS__ ); \
|
||||
log_error("\n"); \
|
||||
/*abort();*/ \
|
||||
} \
|
||||
}
|
||||
|
||||
typedef unsigned char BufferType;
|
||||
|
||||
// Globals for test
|
||||
cl_command_queue queue;
|
||||
|
||||
// Width and height of each pair of images.
|
||||
enum { TotalImages = 8 };
|
||||
size_t width [TotalImages];
|
||||
size_t height [TotalImages];
|
||||
size_t depth [TotalImages];
|
||||
|
||||
// cl buffer and host buffer.
|
||||
cl_mem buffer [TotalImages];
|
||||
BufferType* verify[TotalImages];
|
||||
BufferType* backing[TotalImages];
|
||||
|
||||
// Temporary buffer used for read and write operations.
|
||||
BufferType* tmp_buffer;
|
||||
size_t tmp_buffer_size;
|
||||
|
||||
size_t num_tries = 50; // Number of randomly selected operations to perform.
|
||||
size_t alloc_scale = 2; // Scale term applied buffer allocation size.
|
||||
MTdata mt;
|
||||
|
||||
// Initialize a buffer in host memory containing random values of the specified size.
|
||||
static void initialize_image(BufferType* ptr, size_t w, size_t h, size_t d, MTdata mt)
|
||||
{
|
||||
enum { ElementSize = sizeof(BufferType)/sizeof(unsigned char) };
|
||||
|
||||
unsigned char* buf = (unsigned char*)ptr;
|
||||
size_t size = w*h*d*ElementSize;
|
||||
|
||||
for (size_t i = 0; i != size; i++) {
|
||||
buf[i] = (unsigned char)(genrand_int32(mt) % 0xff);
|
||||
}
|
||||
}
|
||||
|
||||
// This function prints the contents of a buffer to standard error.
|
||||
void print_buffer(BufferType* buf, size_t w, size_t h, size_t d) {
|
||||
log_error("Size = %lux%lux%lu (%lu total)\n",w,h,d,w*h*d);
|
||||
for (unsigned k=0; k!=d;++k) {
|
||||
log_error("Slice: %u\n",k);
|
||||
for (unsigned j=0; j!=h;++j) {
|
||||
for (unsigned i=0;i!=w;++i) {
|
||||
log_error("%02x",buf[k*(w*h)+j*w+i]);
|
||||
}
|
||||
log_error("\n");
|
||||
}
|
||||
log_error("\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if the two specified regions overlap.
|
||||
bool check_overlap_rect(size_t src_offset[3],
|
||||
size_t dst_offset[3],
|
||||
size_t region[3],
|
||||
size_t row_pitch,
|
||||
size_t slice_pitch)
|
||||
{
|
||||
const size_t src_min[] = { src_offset[0], src_offset[1], src_offset[2] };
|
||||
const size_t src_max[] = { src_offset[0] + region[0], src_offset[1] + region[1], src_offset[2] + region[2] };
|
||||
|
||||
const size_t dst_min[] = { dst_offset[0], dst_offset[1], dst_offset[2] };
|
||||
const size_t dst_max[] = { dst_offset[0] + region[0],
|
||||
dst_offset[1] + region[1],
|
||||
dst_offset[2] + region[2]};
|
||||
// Check for overlap
|
||||
bool overlap = true;
|
||||
unsigned i;
|
||||
for (i = 0; i != 3; ++i)
|
||||
{
|
||||
overlap = overlap && (src_min[i] < dst_max[i]) && (src_max[i] > dst_min[i]);
|
||||
}
|
||||
|
||||
size_t dst_start = dst_offset[2] * slice_pitch + dst_offset[1] * row_pitch + dst_offset[0];
|
||||
size_t dst_end = dst_start + (region[2] * slice_pitch +
|
||||
region[1] * row_pitch + region[0]);
|
||||
size_t src_start = src_offset[2] * slice_pitch + src_offset[1] * row_pitch + src_offset[0];
|
||||
size_t src_end = src_start + (region[2] * slice_pitch +
|
||||
region[1] * row_pitch + region[0]);
|
||||
if (!overlap) {
|
||||
size_t delta_src_x = (src_offset[0] + region[0] > row_pitch) ?
|
||||
src_offset[0] + region[0] - row_pitch : 0; size_t delta_dst_x = (dst_offset[0] + region[0] > row_pitch) ?
|
||||
dst_offset[0] + region[0] - row_pitch : 0;
|
||||
if ((delta_src_x > 0 && delta_src_x > dst_offset[0]) ||
|
||||
(delta_dst_x > 0 && delta_dst_x > src_offset[0])) {
|
||||
if ((src_start <= dst_start && dst_start < src_end) || (dst_start <= src_start && src_start < dst_end)) overlap = true;
|
||||
}
|
||||
if (region[2] > 1) {
|
||||
size_t src_height = slice_pitch / row_pitch; size_t dst_height = slice_pitch / row_pitch;
|
||||
size_t delta_src_y = (src_offset[1] + region[1] > src_height) ? src_offset[1] + region[1] - src_height : 0;
|
||||
size_t delta_dst_y = (dst_offset[1] + region[1] > dst_height) ? dst_offset[1] + region[1] - dst_height : 0;
|
||||
if ((delta_src_y > 0 && delta_src_y > dst_offset[1]) ||
|
||||
(delta_dst_y > 0 && delta_dst_y > src_offset[1])) {
|
||||
if ((src_start <= dst_start && dst_start < src_end) || (dst_start <= src_start && src_start < dst_end))
|
||||
overlap = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return overlap;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// This function invokes the CopyBufferRect CL command and then mirrors the operation on the host side verify buffers.
|
||||
int copy_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
|
||||
|
||||
// Copy between cl buffers.
|
||||
size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
|
||||
size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
|
||||
size_t src_row_pitch = width[src];
|
||||
|
||||
cl_int err;
|
||||
if (check_overlap_rect(soffset,doffset,sregion,src_row_pitch, src_slice_pitch)) {
|
||||
log_info( "Copy overlap reported, skipping copy buffer rect\n" );
|
||||
return CL_SUCCESS;
|
||||
} else {
|
||||
if ((err = clEnqueueCopyBufferRect(queue,
|
||||
buffer[src],buffer[dst],
|
||||
soffset, doffset,
|
||||
sregion,/*dregion,*/
|
||||
width[src], src_slice_pitch,
|
||||
width[dst], dst_slice_pitch,
|
||||
0, NULL, NULL)) != CL_SUCCESS)
|
||||
{
|
||||
CL_EXIT_ERROR(err, "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy between host buffers.
|
||||
size_t total = sregion[0] * sregion[1] * sregion[2];
|
||||
|
||||
size_t spitch = width[src];
|
||||
size_t sslice = width[src]*height[src];
|
||||
|
||||
size_t dpitch = width[dst];
|
||||
size_t dslice = width[dst]*height[dst];
|
||||
|
||||
for (size_t i = 0; i != total; ++i) {
|
||||
|
||||
// Compute the coordinates of the element within the source and destination regions.
|
||||
size_t rslice = sregion[0]*sregion[1];
|
||||
size_t sz = i / rslice;
|
||||
size_t sy = (i % rslice) / sregion[0];
|
||||
size_t sx = (i % rslice) % sregion[0];
|
||||
|
||||
size_t dz = sz;
|
||||
size_t dy = sy;
|
||||
size_t dx = sx;
|
||||
|
||||
// Compute the offset in bytes of the source and destination.
|
||||
size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
|
||||
size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
|
||||
|
||||
verify[dst][d_idx] = verify[src][s_idx];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This function compares the destination region in the buffer pointed
|
||||
// to by device, to the source region of the specified verify buffer.
|
||||
int verify_region(BufferType* device, size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3]) {
|
||||
|
||||
// Copy between host buffers.
|
||||
size_t spitch = width[src];
|
||||
size_t sslice = width[src]*height[src];
|
||||
|
||||
size_t dpitch = width[dst];
|
||||
size_t dslice = width[dst]*height[dst];
|
||||
|
||||
size_t total = sregion[0] * sregion[1] * sregion[2];
|
||||
for (size_t i = 0; i != total; ++i) {
|
||||
|
||||
// Compute the coordinates of the element within the source and destination regions.
|
||||
size_t rslice = sregion[0]*sregion[1];
|
||||
size_t sz = i / rslice;
|
||||
size_t sy = (i % rslice) / sregion[0];
|
||||
size_t sx = (i % rslice) % sregion[0];
|
||||
|
||||
// Compute the offset in bytes of the source and destination.
|
||||
size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
|
||||
size_t d_idx = (doffset[2]+sz)*dslice + (doffset[1]+sy)*dpitch + doffset[0]+sx;
|
||||
|
||||
if (device[d_idx] != verify[src][s_idx]) {
|
||||
log_error("Verify failed on comparsion %lu: coordinate (%lu, %lu, %lu) of region\n",i,sx,sy,sz);
|
||||
log_error("0x%02x != 0x%02x\n", device[d_idx], verify[src][s_idx]);
|
||||
#if 0
|
||||
// Uncomment this section to print buffers.
|
||||
log_error("Device (copy): [%lu]\n",dst);
|
||||
print_buffer(device,width[dst],height[dst],depth[dst]);
|
||||
log_error("\n");
|
||||
log_error("Verify: [%lu]\n",src);
|
||||
print_buffer(verify[src],width[src],height[src],depth[src]);
|
||||
log_error("\n");
|
||||
abort();
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// This function invokes ReadBufferRect to read a region from the
|
||||
// specified source buffer into a temporary destination buffer. The
|
||||
// contents of the temporary buffer are then compared to the source
|
||||
// region of the corresponding verify buffer.
|
||||
int read_verify_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
|
||||
|
||||
// Clear the temporary destination host buffer.
|
||||
memset(tmp_buffer, 0xff, tmp_buffer_size);
|
||||
|
||||
size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
|
||||
size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
|
||||
|
||||
// Copy the source region of the cl buffer, to the destination region of the temporary buffer.
|
||||
CL_EXIT_ERROR(clEnqueueReadBufferRect(queue,
|
||||
buffer[src],
|
||||
CL_TRUE,
|
||||
soffset,doffset,
|
||||
sregion,
|
||||
width[src], src_slice_pitch,
|
||||
width[dst], dst_slice_pitch,
|
||||
tmp_buffer,
|
||||
0, NULL, NULL), "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
|
||||
|
||||
return verify_region(tmp_buffer,src,soffset,sregion,dst,doffset);
|
||||
}
|
||||
|
||||
// This function performs the same verification check as
|
||||
// read_verify_region, except a MapBuffer command is used to access the
|
||||
// device buffer data instead of a ReadBufferRect, and the whole
|
||||
// buffer is checked.
|
||||
int map_verify_region(size_t src) {
|
||||
|
||||
size_t size_bytes = width[src]*height[src]*depth[src]*sizeof(BufferType);
|
||||
|
||||
// Copy the source region of the cl buffer, to the destination region of the temporary buffer.
|
||||
cl_int err;
|
||||
BufferType* mapped = (BufferType*)clEnqueueMapBuffer(queue,buffer[src],CL_TRUE,CL_MAP_READ,0,size_bytes,0,NULL,NULL,&err);
|
||||
CL_EXIT_ERROR(err, "clEnqueueMapBuffer failed for buffer %u",(unsigned)src);
|
||||
|
||||
size_t soffset[] = { 0, 0, 0 };
|
||||
size_t sregion[] = { width[src], height[src], depth[src] };
|
||||
|
||||
int ret = verify_region(mapped,src,soffset,sregion,src,soffset);
|
||||
|
||||
CL_EXIT_ERROR(clEnqueueUnmapMemObject(queue,buffer[src],mapped,0,NULL,NULL),
|
||||
"clEnqueueUnmapMemObject failed for buffer %u",(unsigned)src);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// This function generates a new temporary buffer and then writes a
|
||||
// region of it to a region in the specified destination buffer.
|
||||
int write_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
|
||||
|
||||
initialize_image(tmp_buffer, tmp_buffer_size, 1, 1, mt);
|
||||
// memset(tmp_buffer, 0xf0, tmp_buffer_size);
|
||||
|
||||
size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
|
||||
size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
|
||||
|
||||
// Copy the source region of the cl buffer, to the destination region of the temporary buffer.
|
||||
CL_EXIT_ERROR(clEnqueueWriteBufferRect(queue,
|
||||
buffer[dst],
|
||||
CL_TRUE,
|
||||
doffset,soffset,
|
||||
/*sregion,*/dregion,
|
||||
width[dst], dst_slice_pitch,
|
||||
width[src], src_slice_pitch,
|
||||
tmp_buffer,
|
||||
0, NULL, NULL), "clEnqueueWriteBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
|
||||
|
||||
// Copy from the temporary buffer to the host buffer.
|
||||
size_t spitch = width[src];
|
||||
size_t sslice = width[src]*height[src];
|
||||
size_t dpitch = width[dst];
|
||||
size_t dslice = width[dst]*height[dst];
|
||||
|
||||
size_t total = sregion[0] * sregion[1] * sregion[2];
|
||||
for (size_t i = 0; i != total; ++i) {
|
||||
|
||||
// Compute the coordinates of the element within the source and destination regions.
|
||||
size_t rslice = sregion[0]*sregion[1];
|
||||
size_t sz = i / rslice;
|
||||
size_t sy = (i % rslice) / sregion[0];
|
||||
size_t sx = (i % rslice) % sregion[0];
|
||||
|
||||
size_t dz = sz;
|
||||
size_t dy = sy;
|
||||
size_t dx = sx;
|
||||
|
||||
// Compute the offset in bytes of the source and destination.
|
||||
size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
|
||||
size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
|
||||
|
||||
verify[dst][d_idx] = tmp_buffer[s_idx];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
|
||||
{
|
||||
free( data );
|
||||
}
|
||||
|
||||
// This is the main test function for the conformance test.
|
||||
int
|
||||
test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements)
|
||||
{
|
||||
queue = queue_;
|
||||
cl_int err;
|
||||
|
||||
// Initialize the random number generator.
|
||||
mt = init_genrand( gRandomSeed );
|
||||
|
||||
// Compute a maximum buffer size based on the number of test images and the device maximum.
|
||||
cl_ulong max_mem_alloc_size = 0;
|
||||
CL_EXIT_ERROR(clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_mem_alloc_size, NULL),"Could not get device info");
|
||||
log_info("CL_DEVICE_MAX_MEM_ALLOC_SIZE = %llu bytes.\n", max_mem_alloc_size);
|
||||
|
||||
// Confirm that the maximum allocation size is not zero.
|
||||
if (max_mem_alloc_size == 0) {
|
||||
log_error("Error: CL_DEVICE_MAX_MEM_ALLOC_SIZE is zero bytes\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Guess at a reasonable maximum dimension.
|
||||
size_t max_mem_alloc_dim = (size_t)cbrt((double)(max_mem_alloc_size/sizeof(BufferType)))/alloc_scale;
|
||||
if (max_mem_alloc_dim == 0) {
|
||||
max_mem_alloc_dim = max_mem_alloc_size;
|
||||
}
|
||||
|
||||
log_info("Using maximum dimension = %lu.\n", max_mem_alloc_dim);
|
||||
|
||||
// Create pairs of cl buffers and host buffers on which operations will be mirrored.
|
||||
log_info("Creating %u pairs of random sized host and cl buffers.\n", TotalImages);
|
||||
|
||||
size_t max_size = 0;
|
||||
size_t total_bytes = 0;
|
||||
|
||||
for (unsigned i=0; i != TotalImages; ++i) {
|
||||
|
||||
// Determine a width and height for this buffer.
|
||||
size_t size_bytes;
|
||||
size_t tries = 0;
|
||||
size_t max_tries = 1048576;
|
||||
do {
|
||||
width[i] = get_random_size_t(1, max_mem_alloc_dim, mt);
|
||||
height[i] = get_random_size_t(1, max_mem_alloc_dim, mt);
|
||||
depth[i] = get_random_size_t(1, max_mem_alloc_dim, mt);
|
||||
++tries;
|
||||
} while ((tries < max_tries) && (size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType)) > max_mem_alloc_size);
|
||||
|
||||
// Check to see if adequately sized buffers were found.
|
||||
if (tries >= max_tries) {
|
||||
log_error("Error: Could not find random buffer sized less than %llu bytes in %lu tries.\n",
|
||||
max_mem_alloc_size, max_tries);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Keep track of the dimensions of the largest buffer.
|
||||
max_size = (size_bytes > max_size) ? size_bytes : max_size;
|
||||
total_bytes += size_bytes;
|
||||
|
||||
log_info("Buffer[%u] is (%lu,%lu,%lu) = %lu MB (truncated)\n",i,width[i],height[i],depth[i],(size_bytes)/1048576);
|
||||
}
|
||||
|
||||
log_info( "Total size: %lu MB (truncated)\n", total_bytes/1048576 );
|
||||
|
||||
// Allocate a temporary buffer for read and write operations.
|
||||
tmp_buffer_size = max_size;
|
||||
tmp_buffer = (BufferType*)malloc(tmp_buffer_size);
|
||||
|
||||
// Initialize cl buffers
|
||||
log_info( "Initializing buffers\n" );
|
||||
for (unsigned i=0; i != TotalImages; ++i) {
|
||||
|
||||
size_t size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType);
|
||||
|
||||
// Allocate a host copy of the buffer for verification.
|
||||
verify[i] = (BufferType*)malloc(size_bytes);
|
||||
CL_EXIT_ERROR(verify[i] ? CL_SUCCESS : -1, "malloc of host buffer failed for buffer %u", i);
|
||||
|
||||
// Allocate the buffer in host memory.
|
||||
backing[i] = (BufferType*)malloc(size_bytes);
|
||||
CL_EXIT_ERROR(backing[i] ? CL_SUCCESS : -1, "malloc of backing buffer failed for buffer %u", i);
|
||||
|
||||
// Generate a random buffer.
|
||||
log_info( "Initializing buffer %u\n", i );
|
||||
initialize_image(verify[i], width[i], height[i], depth[i], mt);
|
||||
|
||||
// Copy the image into a buffer which will passed to CL.
|
||||
memcpy(backing[i], verify[i], size_bytes);
|
||||
|
||||
// Create the CL buffer.
|
||||
buffer[i] = clCreateBuffer (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, size_bytes, backing[i], &err);
|
||||
CL_EXIT_ERROR(err,"clCreateBuffer failed for buffer %u", i);
|
||||
|
||||
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
|
||||
err = clSetMemObjectDestructorCallback( buffer[i], mem_obj_destructor_callback, backing[i] );
|
||||
CL_EXIT_ERROR(err, "Unable to set mem object destructor callback" );
|
||||
}
|
||||
|
||||
// Main test loop, run num_tries times.
|
||||
log_info( "Executing %u test operations selected at random.\n", (unsigned)num_tries );
|
||||
for (size_t iter = 0; iter < num_tries; ++iter) {
|
||||
|
||||
// Determine a source and a destination.
|
||||
size_t src = get_random_size_t(0,TotalImages,mt);
|
||||
size_t dst = get_random_size_t(0,TotalImages,mt);
|
||||
|
||||
// Determine the minimum dimensions.
|
||||
size_t min_width = width[src] < width[dst] ? width[src] : width[dst];
|
||||
size_t min_height = height[src] < height[dst] ? height[src] : height[dst];
|
||||
size_t min_depth = depth[src] < depth[dst] ? depth[src] : depth[dst];
|
||||
|
||||
// Generate a random source rectangle within the minimum dimensions.
|
||||
size_t mx = get_random_size_t(0, min_width-1, mt);
|
||||
size_t my = get_random_size_t(0, min_height-1, mt);
|
||||
size_t mz = get_random_size_t(0, min_depth-1, mt);
|
||||
|
||||
size_t sw = get_random_size_t(1, (min_width - mx), mt);
|
||||
size_t sh = get_random_size_t(1, (min_height - my), mt);
|
||||
size_t sd = get_random_size_t(1, (min_depth - mz), mt);
|
||||
|
||||
size_t sx = get_random_size_t(0, width[src]-sw, mt);
|
||||
size_t sy = get_random_size_t(0, height[src]-sh, mt);
|
||||
size_t sz = get_random_size_t(0, depth[src]-sd, mt);
|
||||
|
||||
size_t soffset[] = { sx, sy, sz };
|
||||
size_t sregion[] = { sw, sh, sd };
|
||||
|
||||
// Generate a destination rectangle of the same size.
|
||||
size_t dw = sw;
|
||||
size_t dh = sh;
|
||||
size_t dd = sd;
|
||||
|
||||
// Generate a random destination offset within the buffer.
|
||||
size_t dx = get_random_size_t(0, (width[dst] - dw), mt);
|
||||
size_t dy = get_random_size_t(0, (height[dst] - dh), mt);
|
||||
size_t dz = get_random_size_t(0, (depth[dst] - dd), mt);
|
||||
size_t doffset[] = { dx, dy, dz };
|
||||
size_t dregion[] = { dw, dh, dd };
|
||||
|
||||
// Execute one of three operations:
|
||||
// - Copy: Copies between src and dst within each set of host, buffer, and images.
|
||||
// - Read & verify: Reads src region from buffer and image, and compares to host.
|
||||
// - Write: Generates new buffer with src dimensions, and writes to cl buffer and image.
|
||||
|
||||
enum { TotalOperations = 3 };
|
||||
size_t operation = get_random_size_t(0,TotalOperations,mt);
|
||||
|
||||
switch (operation) {
|
||||
case 0:
|
||||
log_info("%lu Copy %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
|
||||
iter,
|
||||
src, soffset[0], soffset[1], soffset[2],
|
||||
dst, doffset[0], doffset[1], doffset[2],
|
||||
sregion[0], sregion[1], sregion[2],
|
||||
sregion[0]*sregion[1]*sregion[2]);
|
||||
if ((err = copy_region(src, soffset, sregion, dst, doffset, dregion)))
|
||||
return err;
|
||||
break;
|
||||
case 1:
|
||||
log_info("%lu Read %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
|
||||
iter,
|
||||
src, soffset[0], soffset[1], soffset[2],
|
||||
dst, doffset[0], doffset[1], doffset[2],
|
||||
sregion[0], sregion[1], sregion[2],
|
||||
sregion[0]*sregion[1]*sregion[2]);
|
||||
if ((err = read_verify_region(src, soffset, sregion, dst, doffset, dregion)))
|
||||
return err;
|
||||
break;
|
||||
case 2:
|
||||
log_info("%lu Write %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
|
||||
iter,
|
||||
src, soffset[0], soffset[1], soffset[2],
|
||||
dst, doffset[0], doffset[1], doffset[2],
|
||||
sregion[0], sregion[1], sregion[2],
|
||||
sregion[0]*sregion[1]*sregion[2]);
|
||||
if ((err = write_region(src, soffset, sregion, dst, doffset, dregion)))
|
||||
return err;
|
||||
break;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Uncomment this section to verify each operation.
|
||||
// If commented out, verification won't occur until the end of the
|
||||
// test, and it will not be possible to determine which operation failed.
|
||||
log_info("Verify src %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", src, 0, 0, 0, width[src], height[src], depth[src]);
|
||||
if (err = map_verify_region(src))
|
||||
return err;
|
||||
|
||||
log_info("Verify dst %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", dst, 0, 0, 0, width[dst], height[dst], depth[dst]);
|
||||
if (err = map_verify_region(dst))
|
||||
return err;
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
} // end main for loop.
|
||||
|
||||
for (unsigned i=0;i<TotalImages;++i) {
|
||||
log_info("Verify %u offset (%u,%u,%u) region (%lux%lux%lu)\n", i, 0, 0, 0, width[i], height[i], depth[i]);
|
||||
if ((err = map_verify_region(i)))
|
||||
return err;
|
||||
}
|
||||
|
||||
// Clean-up.
|
||||
free_mtdata(mt);
|
||||
for (unsigned i=0;i<TotalImages;++i) {
|
||||
free( verify[i] );
|
||||
clReleaseMemObject( buffer[i] );
|
||||
}
|
||||
free( tmp_buffer );
|
||||
|
||||
if (!err) {
|
||||
log_info("RECT read, write test passed\n");
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
276
test_conformance/basic/test_constant.c
Normal file
276
test_conformance/basic/test_constant.c
Normal file
@@ -0,0 +1,276 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *constant_kernel_code =
|
||||
"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" float ftmp = tmpF[tid]; \n"
|
||||
" float Itmp = tmpI[tid]; \n"
|
||||
" out[tid] = ftmp * Itmp; \n"
|
||||
"}\n";
|
||||
|
||||
const char *loop_constant_kernel_code =
|
||||
"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" float sum = 0;\n"
|
||||
" for (int i = 0; i < num; i++) {\n"
|
||||
" float pos = i_pos[i*3];\n"
|
||||
" sum += pos;\n"
|
||||
" }\n"
|
||||
" out[tid] = sum;\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static int
|
||||
verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i < n; i++)
|
||||
{
|
||||
float f = tmpF[i] * tmpI[i];
|
||||
if( out[i] != f )
|
||||
{
|
||||
log_error("CONSTANT test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("CONSTANT test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n)
|
||||
{
|
||||
int i;
|
||||
cl_int j;
|
||||
for (i=0; i < n; i++)
|
||||
{
|
||||
float sum = 0;
|
||||
for (j=0; j < l; ++j)
|
||||
sum += tmp[j*3];
|
||||
|
||||
if( out[i] != sum )
|
||||
{
|
||||
log_error("loop CONSTANT test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("loop CONSTANT test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[3];
|
||||
cl_int *tmpI;
|
||||
cl_float *tmpF, *out;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t global_threads[3];
|
||||
int err;
|
||||
unsigned int i;
|
||||
cl_ulong maxSize, maxGlobalSize, maxAllocSize;
|
||||
size_t num_floats, num_ints, constant_values;
|
||||
MTdata d;
|
||||
RoundingMode oldRoundMode;
|
||||
int isRTZ = 0;
|
||||
|
||||
/* Verify our test buffer won't be bigger than allowed */
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
|
||||
test_error( err, "Unable to get max constant buffer size" );
|
||||
|
||||
log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize);
|
||||
|
||||
// Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
|
||||
test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
|
||||
|
||||
if (maxSize > maxGlobalSize / 4)
|
||||
maxSize = maxGlobalSize / 4;
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
|
||||
test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
|
||||
|
||||
if (maxSize > maxAllocSize)
|
||||
maxSize = maxAllocSize;
|
||||
|
||||
maxSize/=4;
|
||||
num_ints = (size_t)maxSize/sizeof(cl_int);
|
||||
num_floats = (size_t)maxSize/sizeof(cl_float);
|
||||
if (num_ints >= num_floats) {
|
||||
constant_values = num_floats;
|
||||
} else {
|
||||
constant_values = num_ints;
|
||||
}
|
||||
|
||||
log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n",
|
||||
constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float));
|
||||
|
||||
tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
|
||||
tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
|
||||
out = (cl_float*)malloc(sizeof(cl_float) * constant_values);
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * constant_values, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<constant_values; i++) {
|
||||
tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
|
||||
tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" );
|
||||
if (err) {
|
||||
log_error("Failed to create kernel and program: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
global_threads[0] = constant_values;
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
//If we only support rtz mode
|
||||
if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
|
||||
{
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
isRTZ = 1;
|
||||
}
|
||||
|
||||
err = verify(tmpF, tmpI, out, (int)constant_values);
|
||||
|
||||
if (isRTZ)
|
||||
(void)set_round(oldRoundMode, kfloat);
|
||||
|
||||
// Loop constant buffer test
|
||||
cl_program loop_program;
|
||||
cl_kernel loop_kernel;
|
||||
cl_int limit = 2;
|
||||
|
||||
memset(out, 0, sizeof(cl_float) * constant_values);
|
||||
err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1,
|
||||
&loop_constant_kernel_code, "loop_constant_kernel" );
|
||||
if (err) {
|
||||
log_error("Failed to create loop kernel and program: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit);
|
||||
if (err != CL_SUCCESS) {
|
||||
log_error("clSetKernelArgs for loop kernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS) {
|
||||
log_error("clEnqueueNDRangeKernel failed: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS) {
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_loop_constant(tmpF, out, limit, (int)constant_values);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
clReleaseKernel(loop_kernel);
|
||||
clReleaseProgram(loop_program);
|
||||
free(tmpI);
|
||||
free(tmpF);
|
||||
free(out);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
101
test_conformance/basic/test_constant_source.cpp
Normal file
101
test_conformance/basic/test_constant_source.cpp
Normal file
@@ -0,0 +1,101 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *constant_source_kernel_code[] = {
|
||||
"__constant int outVal = 42;\n"
|
||||
"__constant int outIndex = 7;\n"
|
||||
"__constant int outValues[ 16 ] = { 17, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };\n"
|
||||
"\n"
|
||||
"__kernel void constant_kernel( __global int *out )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" if( tid == 0 )\n"
|
||||
" {\n"
|
||||
" out[ 0 ] = outVal;\n"
|
||||
" out[ 1 ] = outValues[ outIndex ];\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" out[ tid + 1 ] = outValues[ tid ];\n"
|
||||
" }\n"
|
||||
"}\n" };
|
||||
|
||||
int test_constant_source(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
clMemWrapper outStream;
|
||||
cl_int outValues[ 17 ];
|
||||
cl_int expectedValues[ 17 ] = { 42, 1985, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };
|
||||
|
||||
cl_int error;
|
||||
|
||||
|
||||
// Create a kernel to test with
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, constant_source_kernel_code, "constant_kernel" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
// Create our output buffer
|
||||
outStream = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof( outValues ), NULL, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
|
||||
// Set the argument
|
||||
error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
|
||||
// Run test kernel
|
||||
size_t threads[ 1 ] = { 16 };
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Unable to enqueue kernel" );
|
||||
|
||||
// Read results
|
||||
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof( outValues ), outValues, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
// Verify results
|
||||
for( int i = 0; i < 17; i++ )
|
||||
{
|
||||
if( expectedValues[ i ] != outValues[ i ] )
|
||||
{
|
||||
if( i == 0 )
|
||||
log_error( "ERROR: Output value %d from constant source global did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
|
||||
else if( i == 1 )
|
||||
log_error( "ERROR: Output value %d from constant-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
|
||||
else
|
||||
log_error( "ERROR: Output value %d from variable-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
122
test_conformance/basic/test_createkernelsinprogram.c
Normal file
122
test_conformance/basic/test_createkernelsinprogram.c
Normal file
@@ -0,0 +1,122 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *sample_single_kernel = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n"};
|
||||
|
||||
const char *sample_double_kernel = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"__kernel void sample_test2(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n"};
|
||||
|
||||
|
||||
int
|
||||
test_createkernelsinprogram(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_program program;
|
||||
cl_kernel kernel[2];
|
||||
unsigned int num_kernels;
|
||||
size_t lengths[2];
|
||||
int err;
|
||||
|
||||
lengths[0] = strlen(sample_single_kernel);
|
||||
program = clCreateProgramWithSource(context, 1, &sample_single_kernel, lengths, NULL);
|
||||
if (!program)
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgramExecutable failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clCreateKernelsInProgram(program, 1, kernel, &num_kernels);
|
||||
if ( (err != CL_SUCCESS) || (num_kernels != 1) )
|
||||
{
|
||||
log_error("clCreateKernelsInProgram test failed for a single kernel\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseProgram(program);
|
||||
|
||||
lengths[0] = strlen(sample_double_kernel);
|
||||
program = clCreateProgramWithSource(context, 1, &sample_double_kernel, lengths, NULL);
|
||||
if (!program)
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgramExecutable failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clCreateKernelsInProgram(program, 2, kernel, &num_kernels);
|
||||
if ( (err != CL_SUCCESS) || (num_kernels != 2) )
|
||||
{
|
||||
log_error("clCreateKernelsInProgram test failed for two kernels\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info("clCreateKernelsInProgram test passed\n");
|
||||
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseKernel(kernel[1]);
|
||||
clReleaseProgram(program);
|
||||
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
254
test_conformance/basic/test_enqueue_map.cpp
Normal file
254
test_conformance/basic/test_enqueue_map.cpp
Normal file
@@ -0,0 +1,254 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
const cl_mem_flags flag_set[] = {
|
||||
CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_COPY_HOST_PTR,
|
||||
0
|
||||
};
|
||||
const char* flag_set_names[] = {
|
||||
"CL_MEM_ALLOC_HOST_PTR",
|
||||
"CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
|
||||
"CL_MEM_USE_HOST_PTR",
|
||||
"CL_MEM_COPY_HOST_PTR",
|
||||
"0"
|
||||
};
|
||||
|
||||
int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
const size_t bufferSize = 256*256;
|
||||
int src_flag_id;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
cl_char *initialData = (cl_char*)malloc(bufferSize);
|
||||
cl_char *finalData = (cl_char*)malloc(bufferSize);
|
||||
|
||||
for (src_flag_id=0; src_flag_id < sizeof(flag_set)/sizeof(flag_set[0]); src_flag_id++)
|
||||
{
|
||||
clMemWrapper memObject;
|
||||
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
|
||||
|
||||
generate_random_data( kChar, (unsigned int)bufferSize, d, initialData );
|
||||
|
||||
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
|
||||
memObject = clCreateBuffer(context, flag_set[src_flag_id], bufferSize * sizeof( cl_char ), initialData, &error);
|
||||
else
|
||||
memObject = clCreateBuffer(context, flag_set[src_flag_id], bufferSize * sizeof( cl_char ), NULL, &error);
|
||||
test_error( error, "Unable to create testing buffer" );
|
||||
|
||||
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
|
||||
{
|
||||
error = clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize * sizeof( cl_char ), initialData, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueWriteBuffer failed");
|
||||
}
|
||||
|
||||
for( int i = 0; i < 128; i++ )
|
||||
{
|
||||
|
||||
size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d );
|
||||
size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d );
|
||||
|
||||
cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
|
||||
offset, length, 0, NULL, NULL, &error );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "clEnqueueMapBuffer call failed" );
|
||||
log_error( "\tOffset: %d Length: %d\n", (int)offset, (int)length );
|
||||
free( initialData );
|
||||
free( finalData );
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Write into the region
|
||||
for( size_t j = 0; j < length; j++ )
|
||||
{
|
||||
cl_char spin = (cl_char)genrand_int32( d );
|
||||
|
||||
// Test read AND write in one swipe
|
||||
cl_char value = mappedRegion[ j ];
|
||||
value = spin - value;
|
||||
mappedRegion[ j ] = value;
|
||||
|
||||
// Also update the initial data array
|
||||
value = initialData[ offset + j ];
|
||||
value = spin - value;
|
||||
initialData[ offset + j ] = value;
|
||||
}
|
||||
|
||||
// Unmap
|
||||
error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
|
||||
test_error( error, "Unable to unmap buffer" );
|
||||
}
|
||||
|
||||
// Final validation: read actual values of buffer and compare against our reference
|
||||
error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, sizeof( cl_char ) * bufferSize, finalData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
for( size_t q = 0; q < bufferSize; q++ )
|
||||
{
|
||||
if( initialData[ q ] != finalData[ q ] )
|
||||
{
|
||||
log_error( "ERROR: Sample %d did not validate! Got %d, expected %d\n", (int)q, (int)finalData[ q ], (int)initialData[ q ] );
|
||||
free( initialData );
|
||||
free( finalData );
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
} // cl_mem flags
|
||||
|
||||
free( initialData );
|
||||
free( finalData );
|
||||
free_mtdata(d);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 };
|
||||
const size_t imageSize = 256;
|
||||
int src_flag_id;
|
||||
cl_uint *initialData;
|
||||
cl_uint *finalData;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
|
||||
|
||||
initialData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
|
||||
finalData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
|
||||
|
||||
if( !is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &format ) )
|
||||
{
|
||||
log_error( "ERROR: Test requires basic OpenCL 1.0 format CL_RGBA:CL_UNSIGNED_INT32, which is unsupported by this device!\n" );
|
||||
free(initialData);
|
||||
free(finalData);
|
||||
return -1;
|
||||
}
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (src_flag_id=0; src_flag_id < sizeof(flag_set)/sizeof(flag_set[0]); src_flag_id++) {
|
||||
clMemWrapper memObject;
|
||||
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
|
||||
|
||||
generate_random_data( kUInt, (unsigned int)( imageSize * imageSize ), d, initialData );
|
||||
|
||||
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
|
||||
memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
|
||||
imageSize, imageSize, 0, initialData, &error );
|
||||
else
|
||||
memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
|
||||
imageSize, imageSize, 0, NULL, &error );
|
||||
test_error( error, "Unable to create testing buffer" );
|
||||
|
||||
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
|
||||
size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1};
|
||||
error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, NULL, NULL, initialData, 0, NULL, NULL);
|
||||
test_error( error, "Unable to write to testing buffer" );
|
||||
}
|
||||
|
||||
for( int i = 0; i < 128; i++ )
|
||||
{
|
||||
|
||||
size_t offset[3], region[3];
|
||||
size_t rowPitch;
|
||||
|
||||
offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
|
||||
region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d );
|
||||
offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
|
||||
region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d );
|
||||
offset[ 2 ] = 0;
|
||||
region[ 2 ] = 1;
|
||||
cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
|
||||
offset, region, &rowPitch, NULL, 0, NULL, NULL, &error );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "clEnqueueMapImage call failed" );
|
||||
log_error( "\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] );
|
||||
free(initialData);
|
||||
free(finalData);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Write into the region
|
||||
cl_uint *mappedPtr = mappedRegion;
|
||||
for( size_t y = 0; y < region[ 1 ]; y++ )
|
||||
{
|
||||
for( size_t x = 0; x < region[ 0 ] * 4; x++ )
|
||||
{
|
||||
cl_int spin = (cl_int)random_in_range( 16, 1024, d );
|
||||
|
||||
cl_int value;
|
||||
// Test read AND write in one swipe
|
||||
value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ];
|
||||
value = spin - value;
|
||||
mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value;
|
||||
|
||||
// Also update the initial data array
|
||||
value = initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ];
|
||||
value = spin - value;
|
||||
initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ] = value;
|
||||
}
|
||||
}
|
||||
|
||||
// Unmap
|
||||
error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
|
||||
test_error( error, "Unable to unmap buffer" );
|
||||
}
|
||||
|
||||
// Final validation: read actual values of buffer and compare against our reference
|
||||
size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 };
|
||||
error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
for( size_t q = 0; q < imageSize * imageSize * 4; q++ )
|
||||
{
|
||||
if( initialData[ q ] != finalData[ q ] )
|
||||
{
|
||||
log_error( "ERROR: Sample %d (coord %d,%d) did not validate! Got %d, expected %d\n", (int)q, (int)( ( q / 4 ) % imageSize ), (int)( ( q / 4 ) / imageSize ),
|
||||
(int)finalData[ q ], (int)initialData[ q ] );
|
||||
free(initialData);
|
||||
free(finalData);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
} // cl_mem_flags
|
||||
|
||||
free(initialData);
|
||||
free(finalData);
|
||||
free_mtdata(d);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
123
test_conformance/basic/test_enqueued_local_size.c
Normal file
123
test_conformance/basic/test_enqueued_local_size.c
Normal file
@@ -0,0 +1,123 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include "../../test_common/harness/rounding_mode.h"
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *enqueued_local_size_2d_code =
|
||||
"__kernel void test_enqueued_local_size_2d(global int *dst)\n"
|
||||
"{\n"
|
||||
" if ((get_global_id(0) == 0) && (get_global_id(1) == 0))\n"
|
||||
" {\n"
|
||||
" dst[0] = (int)get_enqueued_local_size(0)\n;"
|
||||
" dst[1] = (int)get_enqueued_local_size(1)\n;"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
static const char *enqueued_local_size_1d_code =
|
||||
"__kernel void test_enqueued_local_size_1d(global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" if (get_global_id(0) == 0)\n"
|
||||
" {\n"
|
||||
" dst[tid_x] = (int)get_enqueued_local_size(0)\n;"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static int
|
||||
verify_enqueued_local_size(int *result, size_t *expected, int n)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
if (result[i] != (int)expected[i])
|
||||
{
|
||||
log_error("get_enqueued_local_size failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
log_info("get_enqueued_local_size passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams;
|
||||
cl_program program[2];
|
||||
cl_kernel kernel[2];
|
||||
|
||||
int *output_ptr;
|
||||
size_t globalsize[2];
|
||||
size_t localsize[2];
|
||||
int err;
|
||||
|
||||
output_ptr = (int*)malloc(2 * sizeof(int));
|
||||
|
||||
streams = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), 2*sizeof(int), NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
|
||||
err = create_single_kernel_helper_with_build_options(context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code, "test_enqueued_local_size_1d", "-cl-std=CL2.0");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
err = create_single_kernel_helper_with_build_options(context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code, "test_enqueued_local_size_2d", "-cl-std=CL2.0");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams, &streams);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
globalsize[0] = (size_t)num_elements;
|
||||
globalsize[1] = (size_t)num_elements;
|
||||
localsize[0] = 16;
|
||||
localsize[1] = 11;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, globalsize, localsize, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
err = verify_enqueued_local_size(output_ptr, localsize, 2);
|
||||
|
||||
globalsize[0] = (size_t)num_elements;
|
||||
localsize[0] = 9;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, globalsize, localsize, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
err = verify_enqueued_local_size(output_ptr, localsize, 1);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams);
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseKernel(kernel[1]);
|
||||
clReleaseProgram(program[0]);
|
||||
clReleaseProgram(program[1]);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
385
test_conformance/basic/test_explicit_s2v.cpp
Normal file
385
test_conformance/basic/test_explicit_s2v.cpp
Normal file
@@ -0,0 +1,385 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#define DECLARE_S2V_IDENT_KERNEL(srctype,dsttype,size) \
|
||||
"__kernel void test_conversion(__global " srctype " *sourceValues, __global " dsttype #size " *destValues )\n" \
|
||||
"{\n" \
|
||||
" int tid = get_global_id(0);\n" \
|
||||
" " srctype " src = sourceValues[tid];\n" \
|
||||
"\n" \
|
||||
" destValues[tid] = (" dsttype #size ")src;\n" \
|
||||
"\n" \
|
||||
"}\n"
|
||||
|
||||
#define DECLARE_S2V_IDENT_KERNELS(srctype,dsttype) \
|
||||
{ \
|
||||
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,2), \
|
||||
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,4), \
|
||||
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,8), \
|
||||
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,16) \
|
||||
}
|
||||
|
||||
#define DECLARE_EMPTY { NULL, NULL, NULL, NULL, NULL }
|
||||
|
||||
/* Note: the next four arrays all must match in order and size to the ExplicitTypes enum in conversions.h!!! */
|
||||
|
||||
#define DECLARE_S2V_IDENT_KERNELS_SET(srctype) \
|
||||
{ \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,bool), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,char), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,uchar), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned char), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,short), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,ushort), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned short), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,int), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,uint), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned int), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,long), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,ulong), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned long), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,float), \
|
||||
DECLARE_EMPTY \
|
||||
}
|
||||
|
||||
#define DECLARE_EMPTY_SET \
|
||||
{ \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY \
|
||||
}
|
||||
|
||||
|
||||
/* The overall array */
|
||||
const char * kernel_explicit_s2v_set[kNumExplicitTypes][kNumExplicitTypes][5] = {
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(bool),
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(char),
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(uchar),
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(unsigned char),
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(short),
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(ushort),
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(unsigned short),
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(int),
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(uint),
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(unsigned int),
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(long),
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(ulong),
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(unsigned long),
|
||||
DECLARE_S2V_IDENT_KERNELS_SET(float),
|
||||
DECLARE_EMPTY_SET
|
||||
};
|
||||
|
||||
int test_explicit_s2v_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *programSrc,
|
||||
ExplicitType srcType, unsigned int count, ExplicitType destType, unsigned int vecSize, void *inputData )
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
int error;
|
||||
clMemWrapper streams[2];
|
||||
void *outData;
|
||||
unsigned char convertedData[ 8 ]; /* Max type size is 8 bytes */
|
||||
size_t threadSize[3], groupSize[3];
|
||||
unsigned int i, s;
|
||||
unsigned char *inPtr, *outPtr;
|
||||
size_t paramSize, destTypeSize;
|
||||
|
||||
const char* finalProgramSrc[2] = {
|
||||
"", // optional pragma
|
||||
programSrc
|
||||
};
|
||||
|
||||
if (srcType == kDouble || destType == kDouble) {
|
||||
finalProgramSrc[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
||||
}
|
||||
|
||||
|
||||
if( programSrc == NULL )
|
||||
return 0;
|
||||
|
||||
paramSize = get_explicit_type_size( srcType );
|
||||
destTypeSize = get_explicit_type_size( destType );
|
||||
|
||||
size_t destStride = destTypeSize * vecSize;
|
||||
|
||||
outData = malloc( destStride * count );
|
||||
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 2, finalProgramSrc, "test_conversion" ) )
|
||||
{
|
||||
log_info( "****** %s%s *******\n", finalProgramSrc[0], finalProgramSrc[1] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), destStride * count, NULL, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
/* Run the kernel */
|
||||
threadSize[0] = count;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
|
||||
/* Now verify the results. Each value should have been duplicated four times, and we should be able to just
|
||||
do a memcpy instead of relying on the actual type of data */
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, destStride * count, outData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read output values!" );
|
||||
|
||||
inPtr = (unsigned char *)inputData;
|
||||
outPtr = (unsigned char *)outData;
|
||||
|
||||
for( i = 0; i < count; i++ )
|
||||
{
|
||||
/* Convert the input data element to our output data type to compare against */
|
||||
convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
|
||||
|
||||
/* Now compare every element of the vector */
|
||||
for( s = 0; s < vecSize; s++ )
|
||||
{
|
||||
if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
|
||||
{
|
||||
unsigned int *p = (unsigned int *)outPtr;
|
||||
log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
|
||||
log_error( " Input: 0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
|
||||
log_error( " Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
inPtr += paramSize;
|
||||
outPtr += destStride;
|
||||
}
|
||||
|
||||
free( outData );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, ExplicitType srcType,
|
||||
unsigned int count, void *inputData )
|
||||
{
|
||||
unsigned int sizes[] = { 2, 4, 8, 16, 0 };
|
||||
int i, dstType, failed = 0;
|
||||
|
||||
|
||||
for( dstType = kBool; dstType < kNumExplicitTypes; dstType++ )
|
||||
{
|
||||
if( dstType == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
|
||||
continue;
|
||||
|
||||
if (( dstType == kLong || dstType == kULong ) && !gHasLong )
|
||||
continue;
|
||||
|
||||
for( i = 0; sizes[i] != 0; i++ )
|
||||
{
|
||||
if( dstType != srcType )
|
||||
continue;
|
||||
if( strchr( get_explicit_type_name( (ExplicitType)srcType ), ' ' ) != NULL ||
|
||||
strchr( get_explicit_type_name( (ExplicitType)dstType ), ' ' ) != NULL )
|
||||
continue;
|
||||
|
||||
if( test_explicit_s2v_function( deviceID, context, queue, kernel_explicit_s2v_set[ srcType ][ dstType ][ i ],
|
||||
srcType, count, (ExplicitType)dstType, sizes[ i ], inputData ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Explicit cast of scalar %s to vector %s%d FAILED; skipping other %s vector tests\n",
|
||||
get_explicit_type_name(srcType), get_explicit_type_name((ExplicitType)dstType), sizes[i], get_explicit_type_name((ExplicitType)dstType) );
|
||||
failed = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return failed;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
log_info( "NOTE: Boolean vectors not defined in OpenCL 1.0. Skipping test.\n" );
|
||||
return 0;
|
||||
#if 0
|
||||
bool data[128];
|
||||
|
||||
generate_random_data( kBool, 128, data );
|
||||
|
||||
return test_explicit_s2v_function_set( deviceID, context, queue, kBool, 128, data );
|
||||
#endif
|
||||
}
|
||||
|
||||
int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
char data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kChar, 128, seed, data );
|
||||
|
||||
return test_explicit_s2v_function_set( deviceID, context, queue, kChar, 128, data );
|
||||
}
|
||||
|
||||
int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
unsigned char data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kUChar, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUChar, 128, data ) != 0 )
|
||||
return -1;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedChar, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
short data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kShort, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kShort, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
unsigned short data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kUShort, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUShort, 128, data ) != 0 )
|
||||
return -1;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedShort, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kInt, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kInt, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
unsigned int data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kUInt, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUInt, 128, data ) != 0 )
|
||||
return -1;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedInt, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_long data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kLong, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kLong, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_ulong data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kULong, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kULong, 128, data ) != 0 )
|
||||
return -1;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedLong, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
float data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kFloat, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kFloat, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
double data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
if( !is_extension_available( deviceID, "cl_khr_fp64" ) ) {
|
||||
log_info("Extension cl_khr_fp64 not supported. Skipping test.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
generate_random_data( kDouble, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kDouble, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
161
test_conformance/basic/test_float2int.c
Normal file
161
test_conformance/basic/test_float2int.c
Normal file
@@ -0,0 +1,161 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *float2int_kernel_code =
|
||||
"__kernel void test_float2int(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
int
|
||||
verify_float2int(cl_float *inptr, cl_int *outptr, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
if (outptr[i] != (int)inptr[i])
|
||||
{
|
||||
log_error("FLOAT2INT test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("FLOAT2INT test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_float2int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_float *input_ptr;
|
||||
cl_int *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
void *values[2];
|
||||
size_t lengths[1];
|
||||
size_t threads[1];
|
||||
int err;
|
||||
int i;
|
||||
MTdata d;
|
||||
|
||||
input_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
|
||||
output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*num_elements, (void *)input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
lengths[0] = strlen(float2int_kernel_code);
|
||||
program = clCreateProgramWithSource(context, 1, &float2int_kernel_code, lengths, NULL);
|
||||
if (!program)
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgramExecutable failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel = clCreateKernel(program, "test_float2int", NULL);
|
||||
if (!kernel)
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
values[0] = streams[0];
|
||||
values[1] = streams[1];
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err = clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (size_t)num_elements;
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_float2int(input_ptr, output_ptr, num_elements);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
271
test_conformance/basic/test_fpmath_float.c
Normal file
271
test_conformance/basic/test_fpmath_float.c
Normal file
@@ -0,0 +1,271 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include "../../test_common/harness/rounding_mode.h"
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *fpadd_kernel_code =
|
||||
"__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] + srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
static const char *fpsub_kernel_code =
|
||||
"__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] - srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
static const char *fpmul_kernel_code =
|
||||
"__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static const float MAX_ERR = 1e-5f;
|
||||
|
||||
static int
|
||||
verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
|
||||
{
|
||||
float r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] + inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("FP_ADD float test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("FP_ADD float test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n)
|
||||
{
|
||||
float r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] - inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("FP_SUB float test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("FP_SUB float test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n)
|
||||
{
|
||||
float r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("FP_MUL float test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("FP_MUL float test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_program program[3];
|
||||
cl_kernel kernel[3];
|
||||
|
||||
float *input_ptr[3], *output_ptr, *p;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
size_t length = sizeof(cl_float) * num_elements;
|
||||
int isRTZ = 0;
|
||||
RoundingMode oldMode = kDefaultRoundingMode;
|
||||
|
||||
// check for floating point capabilities
|
||||
cl_device_fp_config single_config = 0;
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
|
||||
if (err) {
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
//If we only support rtz mode
|
||||
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
|
||||
{
|
||||
//Check to make sure we are an embedded device
|
||||
char profile[32];
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
|
||||
if( err )
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
|
||||
{
|
||||
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
isRTZ = 1;
|
||||
oldMode = get_round();
|
||||
}
|
||||
|
||||
|
||||
input_ptr[0] = (cl_float*)malloc(length);
|
||||
input_ptr[1] = (cl_float*)malloc(length);
|
||||
input_ptr[2] = (cl_float*)malloc(length);
|
||||
output_ptr = (cl_float*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
p = input_ptr[2];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueWriteBuffer failed.");
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueWriteBuffer failed.");
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueWriteBuffer failed.");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
if( isRTZ )
|
||||
set_round( kRoundTowardZero, kfloat );
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
|
||||
if( isRTZ )
|
||||
set_round( oldMode, kfloat );
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
free_mtdata( d );
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
269
test_conformance/basic/test_fpmath_float2.c
Normal file
269
test_conformance/basic/test_fpmath_float2.c
Normal file
@@ -0,0 +1,269 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include "../../test_common/harness/rounding_mode.h"
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *fpadd2_kernel_code =
|
||||
"__kernel void test_fpadd2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] + srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *fpsub2_kernel_code =
|
||||
"__kernel void test_fpsub2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] - srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *fpmul2_kernel_code =
|
||||
"__kernel void test_fpmul2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
int
|
||||
verify_fpadd2(float *inptrA, float *inptrB, float *outptr, int n)
|
||||
{
|
||||
float r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] + inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("FP_ADD float2 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("FP_ADD float2 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_fpsub2(float *inptrA, float *inptrB, float *outptr, int n)
|
||||
{
|
||||
float r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] - inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("FP_SUB float2 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("FP_SUB float2 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_fpmul2(float *inptrA, float *inptrB, float *outptr, int n)
|
||||
{
|
||||
float r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("FP_MUL float2 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("FP_MUL float2 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_program program[3];
|
||||
cl_kernel kernel[3];
|
||||
|
||||
cl_float *input_ptr[3], *output_ptr, *p;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
size_t length = sizeof(cl_float) * 2 * num_elements;
|
||||
int isRTZ = 0;
|
||||
RoundingMode oldMode = kDefaultRoundingMode;
|
||||
|
||||
// check for floating point capabilities
|
||||
cl_device_fp_config single_config = 0;
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
|
||||
if (err) {
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
//If we only support rtz mode
|
||||
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
|
||||
{
|
||||
//Check to make sure we are an embedded device
|
||||
char profile[32];
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
|
||||
if( err )
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
|
||||
{
|
||||
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
isRTZ = 1;
|
||||
oldMode = get_round();
|
||||
}
|
||||
|
||||
input_ptr[0] = (cl_float*)malloc(length);
|
||||
input_ptr[1] = (cl_float*)malloc(length);
|
||||
input_ptr[2] = (cl_float*)malloc(length);
|
||||
output_ptr = (cl_float*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements*2; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements*2; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
|
||||
p = input_ptr[2];
|
||||
for (i=0; i<num_elements*2; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd2_kernel_code, "test_fpadd2");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub2_kernel_code, "test_fpsub2");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul2_kernel_code, "test_fpmul2");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
free_mtdata(d);
|
||||
d = NULL;
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
if( isRTZ )
|
||||
set_round( kRoundTowardZero, kfloat );
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_fpadd2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_fpsub2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_fpmul2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
|
||||
break;
|
||||
}
|
||||
|
||||
if( isRTZ )
|
||||
set_round( oldMode, kfloat );
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
270
test_conformance/basic/test_fpmath_float4.c
Normal file
270
test_conformance/basic/test_fpmath_float4.c
Normal file
@@ -0,0 +1,270 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/rounding_mode.h"
|
||||
|
||||
const char *fpadd4_kernel_code =
|
||||
"__kernel void test_fpadd4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] + srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *fpsub4_kernel_code =
|
||||
"__kernel void test_fpsub4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] - srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *fpmul4_kernel_code =
|
||||
"__kernel void test_fpmul4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
int
|
||||
verify_fpadd4(float *inptrA, float *inptrB, float *outptr, int n)
|
||||
{
|
||||
float r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] + inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("FP_ADD float4 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("FP_ADD float4 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_fpsub4(float *inptrA, float *inptrB, float *outptr, int n)
|
||||
{
|
||||
float r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] - inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("FP_SUB float4 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("FP_SUB float4 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_fpmul4(float *inptrA, float *inptrB, float *outptr, int n)
|
||||
{
|
||||
float r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("FP_MUL float4 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("FP_MUL float4 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_program program[3];
|
||||
cl_kernel kernel[3];
|
||||
|
||||
cl_float *input_ptr[3], *output_ptr, *p;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
size_t length = sizeof(cl_float) * 4 * num_elements;
|
||||
int isRTZ = 0;
|
||||
RoundingMode oldMode = kDefaultRoundingMode;
|
||||
|
||||
// check for floating point capabilities
|
||||
cl_device_fp_config single_config = 0;
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
|
||||
if (err) {
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
//If we only support rtz mode
|
||||
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
|
||||
{
|
||||
//Check to make sure we are an embedded device
|
||||
char profile[32];
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
|
||||
if( err )
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
|
||||
{
|
||||
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
isRTZ = 1;
|
||||
oldMode = get_round();
|
||||
}
|
||||
|
||||
input_ptr[0] = (cl_float*)malloc(length);
|
||||
input_ptr[1] = (cl_float*)malloc(length);
|
||||
input_ptr[2] = (cl_float*)malloc(length);
|
||||
output_ptr = (cl_float*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements*4; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements*4; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
p = input_ptr[2];
|
||||
for (i=0; i<num_elements*4; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
|
||||
free_mtdata(d);
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd4_kernel_code, "test_fpadd4");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub4_kernel_code, "test_fpsub4");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul4_kernel_code, "test_fpmul4");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
if( isRTZ )
|
||||
set_round( kRoundTowardZero, kfloat );
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_fpadd4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_fpsub4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_fpmul4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
|
||||
break;
|
||||
}
|
||||
|
||||
if( isRTZ )
|
||||
set_round( oldMode, kfloat );
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
191
test_conformance/basic/test_get_linear_ids.cpp
Normal file
191
test_conformance/basic/test_get_linear_ids.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include <ctype.h>
|
||||
|
||||
static const char *linear_ids_source[1] = {
|
||||
"__kernel void test_linear_ids(__global int2 *out)\n"
|
||||
"{\n"
|
||||
" size_t lid, gid;\n"
|
||||
" uint d = get_work_dim();\n"
|
||||
" if (d == 1U) {\n"
|
||||
" gid = get_global_id(0) - get_global_offset(0);\n"
|
||||
" lid = get_local_id(0);\n"
|
||||
" } else if (d == 2U) {\n"
|
||||
" gid = (get_global_id(1) - get_global_offset(1)) * get_global_size(0) +\n"
|
||||
" (get_global_id(0) - get_global_offset(0));\n"
|
||||
" lid = get_local_id(1) * get_local_size(0) + get_local_id(0);\n"
|
||||
" } else {\n"
|
||||
" gid = ((get_global_id(2) - get_global_offset(2)) * get_global_size(1) +\n"
|
||||
" (get_global_id(1) - get_global_offset(1))) * get_global_size(0) +\n"
|
||||
" (get_global_id(0) - get_global_offset(0));\n"
|
||||
" lid = (get_local_id(2) * get_local_size(1) +\n"
|
||||
" get_local_id(1)) * get_local_size(0) + get_local_id(0);\n"
|
||||
" }\n"
|
||||
" out[gid].x = gid == get_global_linear_id();\n"
|
||||
" out[gid].y = lid == get_local_linear_id();\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
#define NUM_ITER 12
|
||||
#define MAX_1D 4096
|
||||
#define MAX_2D 64
|
||||
#define MAX_3D 16
|
||||
#define MAX_OFFSET 100000
|
||||
|
||||
int
|
||||
test_get_linear_ids(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper outbuf;
|
||||
int error, iter, i, j, k;
|
||||
size_t lws[3], gws[3], gwo[3];
|
||||
cl_uint dims;
|
||||
cl_int outmem[2*MAX_1D], *om;
|
||||
|
||||
|
||||
// Create the kernel
|
||||
error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, linear_ids_source, "test_linear_ids", "-cl-std=CL2.0");
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
// Create the out buffer
|
||||
outbuf = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(outmem), NULL, &error);
|
||||
test_error(error, "failed to create result buffer\n");
|
||||
|
||||
// This will leak if there is an error, but this is what is done everywhere else
|
||||
MTdata seed = init_genrand(gRandomSeed);
|
||||
|
||||
// Run some tests
|
||||
for (iter=0; iter<NUM_ITER; ++iter) {
|
||||
dims = iter % 3 + 1;
|
||||
|
||||
switch (dims) {
|
||||
case 1:
|
||||
gwo[0] = random_in_range(0, MAX_OFFSET, seed);
|
||||
gws[0] = random_in_range(MAX_1D/8, MAX_1D/4, seed)*4;
|
||||
error = get_max_common_work_group_size(context, kernel, gws[0], lws);
|
||||
break;
|
||||
case 2:
|
||||
gwo[0] = random_in_range(0, MAX_OFFSET, seed);
|
||||
gwo[1] = random_in_range(0, MAX_OFFSET, seed);
|
||||
gws[0] = random_in_range(MAX_2D/8, MAX_2D/4, seed)*4;
|
||||
gws[1] = random_in_range(MAX_2D/8, MAX_2D/4, seed)*4;
|
||||
error = get_max_common_2D_work_group_size(context, kernel, gws, lws);
|
||||
break;
|
||||
case 3:
|
||||
gwo[0] = random_in_range(0, MAX_OFFSET, seed);
|
||||
gwo[1] = random_in_range(0, MAX_OFFSET, seed);
|
||||
gwo[2] = random_in_range(0, MAX_OFFSET, seed);
|
||||
gws[0] = random_in_range(MAX_3D/4, MAX_3D/2, seed)*2;
|
||||
gws[1] = random_in_range(MAX_3D/4, MAX_3D/2, seed)*2;
|
||||
gws[2] = random_in_range(MAX_3D/4, MAX_3D/2, seed)*2;
|
||||
error = get_max_common_3D_work_group_size(context, kernel, gws, lws);
|
||||
break;
|
||||
}
|
||||
|
||||
test_error(error, "Failed to determine local work size\n");
|
||||
|
||||
|
||||
switch (dims) {
|
||||
case 1:
|
||||
log_info(" testing offset=%u global=%u local=%u...\n", gwo[0], gws[0], lws[0]);
|
||||
break;
|
||||
case 2:
|
||||
log_info(" testing offset=(%u,%u) global=(%u,%u) local=(%u,%u)...\n",
|
||||
gwo[0], gwo[1], gws[0], gws[1], lws[0], lws[1]);
|
||||
break;
|
||||
case 3:
|
||||
log_info(" testing offset=(%u,%u,%u) global=(%u,%u,%u) local=(%u,%u,%u)...\n",
|
||||
gwo[0], gwo[1], gwo[2], gws[0], gws[1], gws[2], lws[0], lws[1], lws[2]);
|
||||
break;
|
||||
}
|
||||
|
||||
// Set up and run
|
||||
memset(outmem, 0, sizeof(outmem));
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(outbuf), (void *)&outbuf);
|
||||
test_error(error, "clSetKernelArg failed\n");
|
||||
|
||||
error = clEnqueueWriteBuffer(queue, outbuf, CL_FALSE, 0, sizeof(outmem), (void *)outmem, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueWriteBuffer failed\n");
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, dims, gwo, gws, lws, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed\n");
|
||||
|
||||
error = clEnqueueReadBuffer(queue, outbuf, CL_FALSE, 0, sizeof(outmem), (void *)outmem, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed\n");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed\n");
|
||||
|
||||
// Check the return
|
||||
switch (dims) {
|
||||
case 1:
|
||||
for (i=0, om=outmem; i<(int)gws[0]; ++i, om+=2) {
|
||||
if (om[0] != 1) {
|
||||
log_error("get_global_linear_id() failed at %d\n", i);
|
||||
return -1;
|
||||
}
|
||||
if (om[1] != 1) {
|
||||
log_error("get_local_linear_id() failed at (%d, %d)\n", i % (int)lws[0], i / (int)lws[0]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (j=0, om=outmem; j<gws[1]; ++j) {
|
||||
for (i=0; i<gws[0]; ++i, om+=2) {
|
||||
if (om[0] != 1) {
|
||||
log_error("get_global_linear_id() failed at (%d,%d)\n", i, j);
|
||||
return -1;
|
||||
}
|
||||
if (om[1] != 1) {
|
||||
log_error("get_local_linear_id() failed at (%d, %d), (%d, %d)\n",
|
||||
i % (int)lws[0], j % (int)lws[1],
|
||||
i / (int)lws[0], j / (int)lws[1]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for (k=0, om=outmem; k<gws[2]; ++k) {
|
||||
for (j=0; j<gws[1]; ++j) {
|
||||
for (i=0; i<gws[0]; ++i, om+=2) {
|
||||
if (om[0] != 1) {
|
||||
log_error("get_global_linear_id() failed at (%d,%d, %d)\n", i, j, k);
|
||||
return -1;
|
||||
}
|
||||
if (om[1] != 1) {
|
||||
log_error("get_local_linear_id() failed at (%d, %d), (%d, %d), (%d, %d)\n",
|
||||
i % (int)lws[0], j % (int)lws[1], k % (int)lws[2],
|
||||
i / (int)lws[0], j / (int)lws[1], k / (int)lws[2]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
free_mtdata(seed);
|
||||
return 0;
|
||||
}
|
||||
|
||||
121
test_conformance/basic/test_global_linear_id.c
Normal file
121
test_conformance/basic/test_global_linear_id.c
Normal file
@@ -0,0 +1,121 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include "../../test_common/harness/rounding_mode.h"
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *global_linear_id_2d_code =
|
||||
"__kernel void test_global_linear_id_2d(global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
"\n"
|
||||
" int linear_id = tid_y * get_global_size(0) + tid_x;\n"
|
||||
" int result = (linear_id == (int)get_global_linear_id()) ? 0x1 : 0x0;\n"
|
||||
" dst[linear_id] = result;\n"
|
||||
"}\n";
|
||||
|
||||
static const char *global_linear_id_1d_code =
|
||||
"__kernel void test_global_linear_id_1d(global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
"\n"
|
||||
" int result = (tid_x == (int)get_global_linear_id()) ? 0x1 : 0x0;\n"
|
||||
" dst[tid_x] = result;\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static int
|
||||
verify_global_linear_id(int *result, int n)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
if (result[i] == 0)
|
||||
{
|
||||
log_error("get_global_linear_id failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
log_info("get_global_linear_id passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_global_linear_id(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams;
|
||||
cl_program program[2];
|
||||
cl_kernel kernel[2];
|
||||
|
||||
int *output_ptr;
|
||||
size_t threads[2];
|
||||
int err;
|
||||
num_elements = (int)sqrt((float)num_elements);
|
||||
int length = num_elements * num_elements;
|
||||
|
||||
output_ptr = (int*)malloc(sizeof(int) * length);
|
||||
|
||||
streams = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length*sizeof(int), NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
|
||||
err = create_single_kernel_helper_with_build_options(context, &program[0], &kernel[0], 1, &global_linear_id_1d_code, "test_global_linear_id_1d", "-cl-std=CL2.0");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
err = create_single_kernel_helper_with_build_options(context, &program[1], &kernel[1], 1, &global_linear_id_2d_code, "test_global_linear_id_2d", "-cl-std=CL2.0");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams, &streams);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
threads[0] = (size_t)num_elements;
|
||||
threads[1] = (size_t)num_elements;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, length*sizeof(int), output_ptr, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
err = verify_global_linear_id(output_ptr, length);
|
||||
|
||||
threads[0] = (size_t)num_elements;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, num_elements*sizeof(int), output_ptr, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
err = verify_global_linear_id(output_ptr, num_elements);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams);
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseKernel(kernel[1]);
|
||||
clReleaseProgram(program[0]);
|
||||
clReleaseProgram(program[1]);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
284
test_conformance/basic/test_global_work_offsets.cpp
Normal file
284
test_conformance/basic/test_global_work_offsets.cpp
Normal file
@@ -0,0 +1,284 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include <ctype.h>
|
||||
|
||||
|
||||
const char *work_offset_test[] = {
|
||||
"__kernel void test( __global int * outputID_A, \n"
|
||||
" __global int * outputID_B, __global int * outputID_C )\n"
|
||||
"{\n"
|
||||
" size_t id0 = get_local_id( 0 ) + get_group_id( 0 ) * get_local_size( 0 );\n"
|
||||
" size_t id1 = get_local_id( 1 ) + get_group_id( 1 ) * get_local_size( 1 );\n"
|
||||
" size_t id2 = get_local_id( 2 ) + get_group_id( 2 ) * get_local_size( 2 );\n"
|
||||
" size_t id = ( id2 * get_global_size( 0 ) * get_global_size( 1 ) ) + ( id1 * get_global_size( 0 ) ) + id0;\n"
|
||||
"\n"
|
||||
" outputID_A[ id ] = get_global_id( 0 );\n"
|
||||
" outputID_B[ id ] = get_global_id( 1 );\n"
|
||||
" outputID_C[ id ] = get_global_id( 2 );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
#define MAX_TEST_ITEMS 16 * 16 * 16
|
||||
#define NUM_TESTS 16
|
||||
#define MAX_OFFSET 256
|
||||
|
||||
#define CHECK_RANGE( v, m, c ) \
|
||||
if( ( v >= (cl_int)m ) || ( v < 0 ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: ouputID_%c[%lu]: %d is < 0 or >= %lu\n", c, i, v, m ); \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
int check_results( size_t threads[], size_t offsets[], cl_int outputA[], cl_int outputB[], cl_int outputC[] )
|
||||
{
|
||||
size_t offsettedSizes[ 3 ] = { threads[ 0 ] + offsets[ 0 ], threads[ 1 ] + offsets[ 1 ], threads[ 2 ] + offsets[ 2 ] };
|
||||
size_t limit = threads[ 0 ] * threads[ 1 ] * threads[ 2 ];
|
||||
|
||||
static char counts[ MAX_OFFSET + 32 ][ MAX_OFFSET + 16 ][ MAX_OFFSET + 16 ];
|
||||
memset( counts, 0, sizeof( counts ) );
|
||||
|
||||
for( size_t i = 0; i < limit; i++ )
|
||||
{
|
||||
// Check ranges first
|
||||
CHECK_RANGE( outputA[ i ], offsettedSizes[ 0 ], 'A' )
|
||||
CHECK_RANGE( outputB[ i ], offsettedSizes[ 1 ], 'B' )
|
||||
CHECK_RANGE( outputC[ i ], offsettedSizes[ 2 ], 'C' )
|
||||
|
||||
// Now set the value in the map
|
||||
counts[ outputA[ i ] ][ outputB[ i ] ][ outputC[ i ] ]++;
|
||||
}
|
||||
|
||||
// Now check the map
|
||||
int missed = 0, multiple = 0, errored = 0, corrected = 0;
|
||||
for( size_t x = 0; x < offsettedSizes[ 0 ]; x++ )
|
||||
{
|
||||
for( size_t y = 0; y < offsettedSizes[ 1 ]; y++ )
|
||||
{
|
||||
for( size_t z = 0; z < offsettedSizes[ 2 ]; z++ )
|
||||
{
|
||||
const char * limitMsg = " (further errors of this type suppressed)";
|
||||
if( ( x >= offsets[ 0 ] ) && ( y >= offsets[ 1 ] ) && ( z >= offsets[ 2 ] ) )
|
||||
{
|
||||
if( counts[ x ][ y ][ z ] < 1 )
|
||||
{
|
||||
if( missed < 3 )
|
||||
log_error( "ERROR: Map value (%ld,%ld,%ld) was missed%s\n", x, y, z, ( missed == 2 ) ? limitMsg : "" );
|
||||
missed++;
|
||||
}
|
||||
else if( counts[ x ][ y ][ z ] > 1 )
|
||||
{
|
||||
if( multiple < 3 )
|
||||
log_error( "ERROR: Map value (%ld,%ld,%ld) was returned multiple times%s\n", x, y, z, ( multiple == 2 ) ? limitMsg : "" );
|
||||
multiple++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( counts[ x ][ y ][ z ] > 0 )
|
||||
{
|
||||
if( errored < 3 )
|
||||
log_error( "ERROR: Map value (%ld,%ld,%ld) was erroneously returned%s\n", x, y, z, ( errored == 2 ) ? limitMsg : "" );
|
||||
errored++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( missed || multiple || errored )
|
||||
{
|
||||
size_t diffs[3] = { ( offsets[ 0 ] > threads[ 0 ] ? 0 : threads[ 0 ] - offsets[ 0 ] ),
|
||||
( offsets[ 1 ] > threads[ 1 ] ? 0 : threads[ 1 ] - offsets[ 1 ] ),
|
||||
( offsets[ 2 ] > threads[ 2 ] ? 0 : threads[ 2 ] - offsets[ 2 ] ) };
|
||||
int diff = (int)( ( threads[ 0 ] - diffs[ 0 ] ) * ( threads[ 1 ] - diffs[ 1 ] ) * ( threads[ 2 ] - diffs[ 2 ] ) );
|
||||
|
||||
if( ( multiple == 0 ) && ( missed == diff ) && ( errored == diff ) )
|
||||
log_error( "ERROR: Global work offset values are not being respected by get_global_id()\n" );
|
||||
else
|
||||
log_error( "ERROR: Global work offset values did not function as expected (%d missed, %d reported multiple times, %d erroneously hit)\n",
|
||||
missed, multiple, errored );
|
||||
}
|
||||
return ( missed | multiple | errored | corrected );
|
||||
}
|
||||
|
||||
int test_global_work_offsets(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 7 ];
|
||||
|
||||
int error;
|
||||
size_t threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
|
||||
cl_int outputA[ MAX_TEST_ITEMS ], outputB[ MAX_TEST_ITEMS ], outputC[ MAX_TEST_ITEMS ];
|
||||
|
||||
|
||||
// Create the kernel
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, work_offset_test, "test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
//// Create some output streams
|
||||
|
||||
// Use just one output array to init them all (no need to init every single stack storage here)
|
||||
memset( outputA, 0xff, sizeof( outputA ) );
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof(outputA), outputA, &error );
|
||||
test_error( error, "Unable to create output array" );
|
||||
}
|
||||
|
||||
// Run a few different times
|
||||
MTdata seed = init_genrand( gRandomSeed );
|
||||
for( int test = 0; test < NUM_TESTS; test++ )
|
||||
{
|
||||
// Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
|
||||
threads[ 0 ] = random_in_range( 1, 32, seed );
|
||||
threads[ 1 ] = random_in_range( 1, 16, seed );
|
||||
threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
|
||||
|
||||
// Make sure we get the local thread count right
|
||||
error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
|
||||
test_error( error, "Unable to determine local work group sizes" );
|
||||
|
||||
// Randomize some offsets
|
||||
for( int j = 0; j < 3; j++ )
|
||||
offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
|
||||
|
||||
log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
|
||||
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
|
||||
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
|
||||
|
||||
// Now set up and run
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
}
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
// Read our results back now
|
||||
cl_int * resultBuffers[] = { outputA, outputB, outputC };
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
error = clEnqueueReadBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( outputA ), resultBuffers[ i ], 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
}
|
||||
|
||||
// Now we need to check the results. The outputs should have one entry for each possible ID,
|
||||
// but they won't be in order, so we need to construct a count map to determine what we got
|
||||
if( check_results( threads, offsets, outputA, outputB, outputC ) )
|
||||
{
|
||||
log_error( "\t(Test failed for global dim %ld,%ld,%ld, local dim %ld,%ld,%ld, offsets %ld,%ld,%ld)\n",
|
||||
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
|
||||
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
free_mtdata(seed);
|
||||
|
||||
// All done!
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *get_offset_test[] = {
|
||||
"__kernel void test( __global int * outOffsets )\n"
|
||||
"{\n"
|
||||
" // We use local ID here so we don't have to worry about offsets\n"
|
||||
" // Also note that these should be the same for ALL threads, so we won't worry about contention\n"
|
||||
" outOffsets[ 0 ] = (int)get_global_offset( 0 );\n"
|
||||
" outOffsets[ 1 ] = (int)get_global_offset( 1 );\n"
|
||||
" outOffsets[ 2 ] = (int)get_global_offset( 2 );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
int test_get_global_offset(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 1 ];
|
||||
|
||||
int error;
|
||||
size_t threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
|
||||
cl_int outOffsets[ 3 ];
|
||||
|
||||
|
||||
// Create the kernel
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, get_offset_test, "test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create some output streams, and storage for a single control ID
|
||||
memset( outOffsets, 0xff, sizeof( outOffsets ) );
|
||||
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof( outOffsets ), outOffsets, &error );
|
||||
test_error( error, "Unable to create control ID buffer" );
|
||||
|
||||
// Run a few different times
|
||||
MTdata seed = init_genrand( gRandomSeed );
|
||||
for( int test = 0; test < NUM_TESTS; test++ )
|
||||
{
|
||||
// Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
|
||||
threads[ 0 ] = random_in_range( 1, 32, seed );
|
||||
threads[ 1 ] = random_in_range( 1, 16, seed );
|
||||
threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
|
||||
|
||||
// Make sure we get the local thread count right
|
||||
error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
|
||||
test_error( error, "Unable to determine local work group sizes" );
|
||||
|
||||
// Randomize some offsets
|
||||
for( int j = 0; j < 3; j++ )
|
||||
offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
|
||||
|
||||
log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
|
||||
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
|
||||
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
|
||||
|
||||
// Now set up and run
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
// Read our results back now
|
||||
error = clEnqueueReadBuffer( queue, streams[ 0 ], CL_TRUE, 0, sizeof( outOffsets ), outOffsets, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
// And check!
|
||||
int errors = 0;
|
||||
for( int j = 0; j < 3; j++ )
|
||||
{
|
||||
if( outOffsets[ j ] != (cl_int)offsets[ j ] )
|
||||
{
|
||||
log_error( "ERROR: get_global_offset( %d ) did not return expected value (expected %ld, got %d)\n", j, offsets[ j ], outOffsets[ j ] );
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if( errors > 0 )
|
||||
return errors;
|
||||
}
|
||||
free_mtdata(seed);
|
||||
|
||||
// All done!
|
||||
return 0;
|
||||
}
|
||||
|
||||
421
test_conformance/basic/test_hiloeo.c
Normal file
421
test_conformance/basic/test_hiloeo.c
Normal file
@@ -0,0 +1,421 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
int hi_offset( int index, int vectorSize) { return index + vectorSize / 2; }
|
||||
int lo_offset( int index, int vectorSize) { return index; }
|
||||
int even_offset( int index, int vectorSize ) { return index * 2; }
|
||||
int odd_offset( int index, int vectorSize ) { return index * 2 + 1; }
|
||||
|
||||
typedef int (*OffsetFunc)( int index, int vectorSize );
|
||||
static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset };
|
||||
typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName );
|
||||
static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" };
|
||||
static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" };
|
||||
|
||||
static const unsigned int vector_sizes[] = { 1, 2, 3, 4, 8, 16};
|
||||
static const unsigned int vector_aligns[] = { 1, 2, 4, 4, 8, 16};
|
||||
static const unsigned int out_vector_idx[] = { 0, 0, 1, 1, 3, 4};
|
||||
// if input is size vector_sizes[i], output is size
|
||||
// vector_sizes[out_vector_idx[i]]
|
||||
// input type name is strcat(gentype, vector_size_names[i]);
|
||||
// and output type name is
|
||||
// strcat(gentype, vector_size_names[out_vector_idx[i]]);
|
||||
static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4,
|
||||
-1,-1,-1,-1,-1,-1,-1,5};
|
||||
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
|
||||
|
||||
static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
|
||||
static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse );
|
||||
|
||||
int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_int *input_ptr, *output_ptr, *p;
|
||||
int err;
|
||||
cl_uint i;
|
||||
int hasDouble = is_extension_available( device, "cl_khr_fp64" );
|
||||
cl_uint vectorSize, operatorToUse;
|
||||
cl_uint type;
|
||||
MTdata d;
|
||||
|
||||
int expressionMode;
|
||||
int numExpressionModes = 2;
|
||||
|
||||
size_t length = sizeof(cl_int) * 4 * n_elems;
|
||||
|
||||
input_ptr = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
|
||||
p = input_ptr;
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<4 * (cl_uint) n_elems; i++)
|
||||
p[i] = genrand_int32(d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
|
||||
{
|
||||
// Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
|
||||
size_t elementCount = length / kSizes[type];
|
||||
cl_mem streams[2];
|
||||
|
||||
// skip double if unavailable
|
||||
if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" )))
|
||||
continue;
|
||||
|
||||
if( !gHasLong &&
|
||||
(( 0 == strcmp( test_str_names[type], "long" )) ||
|
||||
( 0 == strcmp( test_str_names[type], "ulong" ))))
|
||||
continue;
|
||||
|
||||
log_info( "%s", test_str_names[type] );
|
||||
fflush( stdout );
|
||||
|
||||
// Set up data streams for the type
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ )
|
||||
{
|
||||
log_info( " %s", operatorToUse_names[ operatorToUse ] );
|
||||
fflush( stdout );
|
||||
for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) {
|
||||
for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) {
|
||||
|
||||
cl_program program = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
cl_uint outVectorSize = out_vector_idx[vectorSize];
|
||||
char expression[1024];
|
||||
|
||||
const char *source[] = {
|
||||
"", // optional pragma string
|
||||
"__kernel void test_", operatorToUse_names[ operatorToUse ], "_", test_str_names[type], vector_size_names[vectorSize],
|
||||
"(__global ", test_str_names[type], vector_size_names[vectorSize],
|
||||
" *srcA, __global ", test_str_names[type], vector_size_names[outVectorSize],
|
||||
" *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" ", test_str_names[type],
|
||||
vector_size_names[out_vector_idx[vectorSize]],
|
||||
" tmp = ", expression, ".", operatorToUse_names[ operatorToUse ], ";\n"
|
||||
" dst[tid] = tmp;\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
if(expressionMode == 0) {
|
||||
sprintf(expression, "srcA[tid]");
|
||||
} else if(expressionMode == 1) {
|
||||
switch(vector_sizes[vectorSize]) {
|
||||
case 16:
|
||||
sprintf(expression,
|
||||
"((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))",
|
||||
test_str_names[type]
|
||||
);
|
||||
break;
|
||||
case 8:
|
||||
sprintf(expression,
|
||||
"((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))",
|
||||
test_str_names[type]
|
||||
);
|
||||
break;
|
||||
case 4:
|
||||
sprintf(expression,
|
||||
"((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))",
|
||||
test_str_names[type]
|
||||
);
|
||||
break;
|
||||
case 3:
|
||||
sprintf(expression,
|
||||
"((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))",
|
||||
test_str_names[type]
|
||||
);
|
||||
break;
|
||||
case 2:
|
||||
sprintf(expression,
|
||||
"((%s2)(srcA[tid].s0, srcA[tid].s1))",
|
||||
test_str_names[type]
|
||||
);
|
||||
break;
|
||||
default :
|
||||
sprintf(expression, "srcA[tid]");
|
||||
log_info("Default\n");
|
||||
}
|
||||
} else {
|
||||
sprintf(expression, "srcA[tid]");
|
||||
}
|
||||
|
||||
if (0 == strcmp( test_str_names[type], "double" ))
|
||||
source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
||||
|
||||
char kernelName[128];
|
||||
snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] );
|
||||
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
//Wipe the output buffer clean
|
||||
uint32_t pattern = 0xdeadbeef;
|
||||
memset_pattern4( output_ptr, &pattern, length );
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t size = elementCount / (vector_aligns[vectorSize]);
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *inP = (char *)input_ptr;
|
||||
char *outP = (char *)output_ptr;
|
||||
outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) -
|
||||
( vector_sizes[ out_vector_idx[vectorSize] ] ) );
|
||||
// was outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
|
||||
for( size_t e = 0; e < size; e++ )
|
||||
{
|
||||
if( CheckResults( inP, outP, 1, type, vectorSize, operatorToUse ) ) {
|
||||
|
||||
log_info("e is %d\n", (int)e);
|
||||
fflush(stdout);
|
||||
// break;
|
||||
return -1;
|
||||
}
|
||||
inP += kSizes[type] * ( vector_aligns[vectorSize] );
|
||||
outP += kSizes[type] * ( vector_aligns[outVectorSize] );
|
||||
}
|
||||
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
log_info( "." );
|
||||
fflush( stdout );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
log_info( "done\n" );
|
||||
}
|
||||
|
||||
log_info("HiLoEO test passed\n");
|
||||
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse )
|
||||
{
|
||||
cl_ulong array[8];
|
||||
void *p = array;
|
||||
size_t halfVectorSize = vector_sizes[out_vector_idx[vectorSize]];
|
||||
size_t cmpVectorSize = vector_sizes[out_vector_idx[vectorSize]];
|
||||
// was 1 << (vectorSize-1);
|
||||
OffsetFunc f = offsetFuncs[ operatorToUse ];
|
||||
size_t elementSize = kSizes[type];
|
||||
|
||||
if(vector_size_names[vectorSize][0] == '3') {
|
||||
if(operatorToUse_names[operatorToUse][0] == 'h' ||
|
||||
operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
|
||||
{
|
||||
cmpVectorSize = 1; // special case for vec3 ignored values
|
||||
}
|
||||
}
|
||||
|
||||
switch( elementSize )
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
char *i = (char*)in;
|
||||
char *o = (char*)out;
|
||||
size_t j;
|
||||
cl_uint k;
|
||||
OffsetFunc f = offsetFuncs[ operatorToUse ];
|
||||
|
||||
for( k = 0; k < elementCount; k++ )
|
||||
{
|
||||
char *o2 = (char*)p;
|
||||
for( j = 0; j < halfVectorSize; j++ )
|
||||
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
|
||||
|
||||
if( memcmp( o, o2, elementSize * cmpVectorSize ) )
|
||||
{
|
||||
log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
|
||||
for( j = 1; j < halfVectorSize * 2; j++ )
|
||||
log_info( ", %d", i[j] );
|
||||
log_info( " } --> { %d", o[0] );
|
||||
for( j = 1; j < halfVectorSize; j++ )
|
||||
log_info( ", %d", o[j] );
|
||||
log_info( " }\n" );
|
||||
return -1;
|
||||
}
|
||||
i += 2 * halfVectorSize;
|
||||
o += halfVectorSize;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 2:
|
||||
{
|
||||
short *i = (short*)in;
|
||||
short *o = (short*)out;
|
||||
size_t j;
|
||||
cl_uint k;
|
||||
|
||||
for( k = 0; k < elementCount; k++ )
|
||||
{
|
||||
short *o2 = (short*)p;
|
||||
for( j = 0; j < halfVectorSize; j++ )
|
||||
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
|
||||
|
||||
if( memcmp( o, o2, elementSize * cmpVectorSize ) )
|
||||
{
|
||||
log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
|
||||
for( j = 1; j < halfVectorSize * 2; j++ )
|
||||
log_info( ", %d", i[j] );
|
||||
log_info( " } --> { %d", o[0] );
|
||||
for( j = 1; j < halfVectorSize; j++ )
|
||||
log_info( ", %d", o[j] );
|
||||
log_info( " }\n" );
|
||||
return -1;
|
||||
}
|
||||
i += 2 * halfVectorSize;
|
||||
o += halfVectorSize;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 4:
|
||||
{
|
||||
int *i = (int*)in;
|
||||
int *o = (int*)out;
|
||||
size_t j;
|
||||
cl_uint k;
|
||||
|
||||
for( k = 0; k < elementCount; k++ )
|
||||
{
|
||||
int *o2 = (int *)p;
|
||||
for( j = 0; j < halfVectorSize; j++ )
|
||||
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
|
||||
|
||||
for( j = 0; j < cmpVectorSize; j++ )
|
||||
{
|
||||
/* Allow float nans to be binary different */
|
||||
if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
|
||||
{
|
||||
log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
|
||||
for( j = 1; j < halfVectorSize * 2; j++ )
|
||||
log_info( ", 0x%8.8x", i[j] );
|
||||
log_info( " } --> { 0x%8.8x", o[0] );
|
||||
for( j = 1; j < halfVectorSize; j++ )
|
||||
log_info( ", 0x%8.8x", o[j] );
|
||||
log_info( " }\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
i += 2 * halfVectorSize;
|
||||
o += halfVectorSize;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 8:
|
||||
{
|
||||
cl_ulong *i = (cl_ulong*)in;
|
||||
cl_ulong *o = (cl_ulong*)out;
|
||||
size_t j;
|
||||
cl_uint k;
|
||||
|
||||
for( k = 0; k < elementCount; k++ )
|
||||
{
|
||||
cl_ulong *o2 = (cl_ulong*)p;
|
||||
for( j = 0; j < halfVectorSize; j++ )
|
||||
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
|
||||
|
||||
if( memcmp( o, o2, elementSize * cmpVectorSize ) )
|
||||
{
|
||||
log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
|
||||
for( j = 1; j < halfVectorSize * 2; j++ )
|
||||
log_info( ", 0x%16.16llx", i[j] );
|
||||
log_info( " } --> { 0x%16.16llx", o[0] );
|
||||
for( j = 1; j < halfVectorSize; j++ )
|
||||
log_info( ", 0x%16.16llx", o[j] );
|
||||
log_info( " }\n" );
|
||||
return -1;
|
||||
}
|
||||
i += 2 * halfVectorSize;
|
||||
o += halfVectorSize;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
log_info( "Internal error. Unknown data type\n" );
|
||||
return -2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
277
test_conformance/basic/test_hostptr.c
Normal file
277
test_conformance/basic/test_hostptr.c
Normal file
@@ -0,0 +1,277 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *hostptr_kernel_code =
|
||||
"__kernel void test_hostptr(__global float *srcA, __global float *srcB, __global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] + srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
static const float MAX_ERR = 1e-5f;
|
||||
|
||||
static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n)
|
||||
{
|
||||
cl_float r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] + inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void make_random_data(unsigned count, float *ptr, MTdata d)
|
||||
{
|
||||
cl_uint i;
|
||||
for (i=0; i<count; i++)
|
||||
ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), d);
|
||||
}
|
||||
|
||||
static unsigned char *
|
||||
generate_rgba8_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static unsigned char *
|
||||
randomize_rgba8_image(unsigned char *ptr, int w, int h, MTdata d)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_float *input_ptr[2], *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[3]={0,0,0};
|
||||
cl_image_format img_format;
|
||||
cl_uchar *rgba8_inptr, *rgba8_outptr;
|
||||
void *lock_buffer;
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
cl_int err;
|
||||
MTdata d;
|
||||
RoundingMode oldRoundMode;
|
||||
int isRTZ = 0;
|
||||
|
||||
// Block to mark deletion of streams before deletion of host_ptr
|
||||
{
|
||||
clMemWrapper streams[7];
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
// Alloc buffers
|
||||
input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
|
||||
input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
|
||||
output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
rgba8_inptr = (cl_uchar *)generate_rgba8_image(img_width, img_height, d);
|
||||
rgba8_outptr = (cl_uchar *)malloc(sizeof(cl_uchar) * 4 * img_width * img_height);
|
||||
|
||||
// Random data
|
||||
make_random_data(num_elements, input_ptr[0], d);
|
||||
make_random_data(num_elements, input_ptr[1], d);
|
||||
|
||||
// Create host-side input
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[0], &err);
|
||||
test_error(err, "clCreateBuffer 0 failed");
|
||||
|
||||
// Create a copied input
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[1], &err);
|
||||
test_error(err, "clCreateBuffer 1 failed");
|
||||
|
||||
// Create a host-side output
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, output_ptr, &err);
|
||||
test_error(err, "clCreateBuffer 2 failed");
|
||||
|
||||
// Create a host-side input
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
|
||||
test_error(err, "create_image_2d 3 failed");
|
||||
|
||||
// Create a copied input
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
|
||||
test_error(err, "create_image_2d 4 failed");
|
||||
|
||||
// Create a host-side output
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
|
||||
test_error(err, "create_image_2d 5 failed");
|
||||
|
||||
// Create a copied output
|
||||
img_format.image_channel_data_type = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[6] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
|
||||
test_error(err, "create_image_2d 6 failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel,1, &hostptr_kernel_code, "test_hostptr" );
|
||||
test_error(err, "create_single_kernel_helper failed");
|
||||
|
||||
// Execute kernel
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
threads[0] = (size_t)num_elements;
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
cl_float *data = (cl_float*) clEnqueueMapBuffer( queue, streams[2], CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float) * num_elements, 0, NULL, NULL, &err );
|
||||
test_error( err, "clEnqueueMapBuffer failed" );
|
||||
|
||||
//If we only support rtz mode
|
||||
if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
|
||||
{
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
isRTZ = 1;
|
||||
}
|
||||
|
||||
if (isRTZ)
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
|
||||
// Verify that we got the expected results back on the host side
|
||||
err = verify_hostptr(input_ptr[0], input_ptr[1], data, num_elements);
|
||||
if (err)
|
||||
{
|
||||
log_error("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
|
||||
"and a CL_MEM_USE_HOST_PTR output did not return the expected results.\n");
|
||||
} else {
|
||||
log_info("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
|
||||
"and a CL_MEM_USE_HOST_PTR output returned the expected results.\n");
|
||||
}
|
||||
|
||||
if (isRTZ)
|
||||
set_round(oldRoundMode, kfloat);
|
||||
|
||||
err = clEnqueueUnmapMemObject( queue, streams[2], data, 0, NULL, NULL );
|
||||
test_error( err, "clEnqueueUnmapMemObject failed" );
|
||||
|
||||
size_t origin[3]={0,0,0}, region[3]={img_width, img_height, 1};
|
||||
randomize_rgba8_image(rgba8_outptr, img_width, img_height, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
// Copy from host-side to host-side
|
||||
log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR...\n");
|
||||
err = clEnqueueCopyImage(queue, streams[3], streams[5],
|
||||
origin, origin, region, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueCopyImage failed");
|
||||
log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR image passed.\n");
|
||||
|
||||
// test the lock buffer interface
|
||||
log_info("Mapping the CL_MEM_USE_HOST_PTR image with clEnqueueMapImage...\n");
|
||||
size_t row_pitch;
|
||||
lock_buffer = clEnqueueMapImage(queue, streams[5], CL_TRUE,
|
||||
CL_MAP_READ, origin, region,
|
||||
&row_pitch, NULL,
|
||||
0, NULL, NULL, &err);
|
||||
test_error(err, "clEnqueueMapImage failed");
|
||||
|
||||
err = verify_rgba8_image(rgba8_inptr, (unsigned char*)lock_buffer, img_width, img_height);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("verify_rgba8_image FAILED after clEnqueueMapImage\n");
|
||||
return -1;
|
||||
}
|
||||
log_info("verify_rgba8_image passed after clEnqueueMapImage\n");
|
||||
|
||||
err = clEnqueueUnmapMemObject(queue, streams[5], lock_buffer, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueUnmapMemObject failed");
|
||||
|
||||
// Copy host-side to device-side and read back
|
||||
log_info("clEnqueueCopyImage CL_MEM_USE_HOST_PTR to CL_MEM_COPY_HOST_PTR...\n");
|
||||
err = clEnqueueCopyImage(queue, streams[3], streams[5],
|
||||
origin, origin, region,
|
||||
0, NULL, NULL);
|
||||
test_error(err, "clEnqueueCopyImage failed");
|
||||
|
||||
err = clEnqueueReadImage(queue, streams[5], CL_TRUE, origin, region, 4*img_width, 0, rgba8_outptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueReadImage failed");
|
||||
|
||||
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("verify_rgba8_image FAILED after clEnqueueCopyImage, clEnqueueReadImage\n");
|
||||
return -1;
|
||||
}
|
||||
log_info("verify_rgba8_image passed after clEnqueueCopyImage, clEnqueueReadImage\n");
|
||||
}
|
||||
// cleanup
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(output_ptr);
|
||||
|
||||
free(rgba8_inptr);
|
||||
free(rgba8_outptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
166
test_conformance/basic/test_if.c
Normal file
166
test_conformance/basic/test_if.c
Normal file
@@ -0,0 +1,166 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *conditional_kernel_code =
|
||||
"__kernel void test_if(__global int *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" if (src[tid] == 0)\n"
|
||||
" dst[tid] = 0x12345678;\n"
|
||||
" else if (src[tid] == 1)\n"
|
||||
" dst[tid] = 0x23456781;\n"
|
||||
" else if (src[tid] == 2)\n"
|
||||
" dst[tid] = 0x34567812;\n"
|
||||
" else if (src[tid] == 3)\n"
|
||||
" dst[tid] = 0x45678123;\n"
|
||||
" else if (src[tid] == 4)\n"
|
||||
" dst[tid] = 0x56781234;\n"
|
||||
" else if (src[tid] == 5)\n"
|
||||
" dst[tid] = 0x67812345;\n"
|
||||
" else if (src[tid] == 6)\n"
|
||||
" dst[tid] = 0x78123456;\n"
|
||||
" else if (src[tid] == 7)\n"
|
||||
" dst[tid] = 0x81234567;\n"
|
||||
" else\n"
|
||||
" dst[tid] = 0x7FFFFFFF;\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
const int results[] = {
|
||||
0x12345678,
|
||||
0x23456781,
|
||||
0x34567812,
|
||||
0x45678123,
|
||||
0x56781234,
|
||||
0x67812345,
|
||||
0x78123456,
|
||||
0x81234567,
|
||||
};
|
||||
|
||||
int
|
||||
verify_if(int *inptr, int *outptr, int n)
|
||||
{
|
||||
int r, i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
if (inptr[i] <= 7)
|
||||
r = results[inptr[i]];
|
||||
else
|
||||
r = 0x7FFFFFFF;
|
||||
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("IF test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("IF test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_if(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_int *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
size_t length = sizeof(cl_int) * num_elements;
|
||||
input_ptr = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (int)get_random_float(0, 32, d);
|
||||
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &conditional_kernel_code, "test_if" );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_if(input_ptr, output_ptr, num_elements);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
654
test_conformance/basic/test_image_multipass.c
Normal file
654
test_conformance/basic/test_image_multipass.c
Normal file
@@ -0,0 +1,654 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *image_to_image_kernel_integer_coord_code =
|
||||
"\n"
|
||||
"__kernel void image_to_image_copy(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
static const char *image_to_image_kernel_float_coord_code =
|
||||
"\n"
|
||||
"__kernel void image_to_image_copy(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, sampler, (float2)((float)tid_x, (float)tid_y));\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static const char *image_sum_kernel_integer_coord_code =
|
||||
"\n"
|
||||
"__kernel void image_sum(read_only image2d_t srcimg0, read_only image2d_t srcimg1, write_only image2d_t dstimg, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" float4 color0;\n"
|
||||
" float4 color1;\n"
|
||||
"\n"
|
||||
" color0 = read_imagef(srcimg0, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" color1 = read_imagef(srcimg1, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color0 + color1);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static const char *image_sum_kernel_float_coord_code =
|
||||
"\n"
|
||||
"__kernel void image_sum(read_only image2d_t srcimg0, read_only image2d_t srcimg1, write_only image2d_t dstimg, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" float4 color0;\n"
|
||||
" float4 color1;\n"
|
||||
"\n"
|
||||
" color0 = read_imagef(srcimg0, sampler, (float2)((float)tid_x, (float)tid_y));\n"
|
||||
" color1 = read_imagef(srcimg1, sampler, (float2)((float)tid_x, (float)tid_y));\n"
|
||||
" write_imagef(dstimg,(int2)(tid_x, tid_y), color0 + color1);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static unsigned char *
|
||||
generate_initial_byte_image(int w, int h, int num_elements, unsigned char value)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * num_elements);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < w*h*num_elements; i++)
|
||||
ptr[i] = value;
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static unsigned char *
|
||||
generate_expected_byte_image(unsigned char **input_data, int num_inputs, int w, int h, int num_elements)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * num_elements);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < w*h*num_elements; i++)
|
||||
{
|
||||
int j;
|
||||
ptr[i] = 0;
|
||||
for (j = 0; j < num_inputs; j++)
|
||||
{
|
||||
unsigned char *input = *(input_data + j);
|
||||
ptr[i] += input[i];
|
||||
}
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
||||
static unsigned char *
|
||||
generate_byte_image(int w, int h, int num_elements, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * num_elements);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < w*h*num_elements; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d) & 31;
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_byte_image(unsigned char *image, unsigned char *outptr, int w, int h, int num_elements)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < w*h*num_elements; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
test_image_multipass_integer_coord(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
cl_image_format img_format;
|
||||
|
||||
int num_input_streams = 8;
|
||||
cl_mem *input_streams;
|
||||
cl_mem accum_streams[2];
|
||||
unsigned char *expected_output;
|
||||
unsigned char *output_ptr;
|
||||
cl_kernel kernel[2];
|
||||
int err;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
|
||||
expected_output = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
|
||||
// Create the accum images with initial data.
|
||||
{
|
||||
unsigned char *initial_data;
|
||||
cl_mem_flags flags;
|
||||
|
||||
initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
|
||||
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
|
||||
|
||||
accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!accum_streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
free(expected_output);
|
||||
free(output_ptr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, accum_streams[0], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
initial_data, 0, NULL, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clWriteImage failed: %d\n", err);
|
||||
free(expected_output);
|
||||
free(output_ptr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
accum_streams[1] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!accum_streams[1])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
free(expected_output);
|
||||
free(output_ptr);
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteImage(queue, accum_streams[1], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
initial_data, 0, NULL, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clWriteImage failed: %d\n", err);
|
||||
free(expected_output);
|
||||
free(output_ptr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(initial_data);
|
||||
}
|
||||
|
||||
// Set up the input data.
|
||||
{
|
||||
cl_mem_flags flags;
|
||||
unsigned char **input_data = (unsigned char **)malloc(sizeof(unsigned char*) * num_input_streams);
|
||||
MTdata d;
|
||||
|
||||
input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
|
||||
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
|
||||
|
||||
int i;
|
||||
d = init_genrand( gRandomSeed );
|
||||
for ( i = 0; i < num_input_streams; i++)
|
||||
{
|
||||
input_data[i] = generate_byte_image(img_width, img_height, 4, d);
|
||||
input_streams[i] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!input_streams[i])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
free_mtdata(d);
|
||||
free(expected_output);
|
||||
free(output_ptr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, input_streams[i], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
input_data[i], 0, NULL, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clWriteImage failed: %d\n", err);
|
||||
free_mtdata(d);
|
||||
free(expected_output);
|
||||
free(output_ptr);
|
||||
free(input_streams);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
expected_output = generate_expected_byte_image(input_data, num_input_streams, img_width, img_height, 4);
|
||||
for ( i = 0; i < num_input_streams; i++)
|
||||
{
|
||||
free(input_data[i]);
|
||||
}
|
||||
free( input_data );
|
||||
}
|
||||
|
||||
// Set up the kernels.
|
||||
{
|
||||
cl_program program[4];
|
||||
|
||||
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &image_to_image_kernel_integer_coord_code, "image_to_image_copy");
|
||||
if (err)
|
||||
{
|
||||
log_error("Failed to create kernel 0: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &image_sum_kernel_integer_coord_code, "image_sum");
|
||||
if (err)
|
||||
{
|
||||
log_error("Failed to create kernel 1: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
clReleaseProgram(program[0]);
|
||||
clReleaseProgram(program[1]);
|
||||
}
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
{
|
||||
size_t threads[3] = {0, 0, 0};
|
||||
threads[0] = (size_t)img_width;
|
||||
threads[1] = (size_t)img_height;
|
||||
int i;
|
||||
|
||||
{
|
||||
cl_mem accum_input;
|
||||
cl_mem accum_output;
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof input_streams[0], &input_streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof accum_streams[0], &accum_streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 1; i < num_input_streams; i++)
|
||||
{
|
||||
accum_input = accum_streams[(i-1)%2];
|
||||
accum_output = accum_streams[i%2];
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof accum_input, &accum_input);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof input_streams[i], &input_streams[i]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof accum_output, &accum_output);
|
||||
err |= clSetKernelArg(kernel[1], 3, sizeof sampler, &sampler);
|
||||
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the last accum into the other one.
|
||||
accum_input = accum_streams[(i-1)%2];
|
||||
accum_output = accum_streams[i%2];
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof accum_input, &accum_input);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof accum_output, &accum_output);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
|
||||
err = clEnqueueReadImage(queue, accum_output, CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
(void *)output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadImage failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = verify_byte_image(expected_output, output_ptr, img_width, img_height, 4);
|
||||
if (err)
|
||||
{
|
||||
log_error("IMAGE_MULTIPASS test failed.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("IMAGE_MULTIPASS test passed\n");
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseSampler(sampler);
|
||||
}
|
||||
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(accum_streams[0]);
|
||||
clReleaseMemObject(accum_streams[1]);
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < num_input_streams; i++)
|
||||
{
|
||||
clReleaseMemObject(input_streams[i]);
|
||||
}
|
||||
}
|
||||
free(input_streams);
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseKernel(kernel[1]);
|
||||
free(expected_output);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int
|
||||
test_image_multipass_float_coord(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
cl_image_format img_format;
|
||||
|
||||
int num_input_streams = 8;
|
||||
cl_mem *input_streams;
|
||||
cl_mem accum_streams[2];
|
||||
unsigned char *expected_output;
|
||||
unsigned char *output_ptr;
|
||||
cl_kernel kernel[2];
|
||||
int err;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
|
||||
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
|
||||
// Create the accum images with initial data.
|
||||
{
|
||||
unsigned char *initial_data;
|
||||
cl_mem_flags flags;
|
||||
|
||||
initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
|
||||
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
|
||||
|
||||
accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!accum_streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, accum_streams[0], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
initial_data, 0, NULL, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clWriteImage failed: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
accum_streams[1] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!accum_streams[1])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteImage(queue, accum_streams[1], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
initial_data, 0, NULL, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clWriteImage failed: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(initial_data);
|
||||
}
|
||||
|
||||
// Set up the input data.
|
||||
{
|
||||
cl_mem_flags flags;
|
||||
unsigned char **input_data = (unsigned char **)malloc(sizeof(unsigned char*) * num_input_streams);
|
||||
MTdata d;
|
||||
|
||||
input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
|
||||
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
|
||||
|
||||
int i;
|
||||
d = init_genrand( gRandomSeed );
|
||||
for ( i = 0; i < num_input_streams; i++)
|
||||
{
|
||||
input_data[i] = generate_byte_image(img_width, img_height, 4, d);
|
||||
input_streams[i] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!input_streams[i])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
free(input_data);
|
||||
free(input_streams);
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, input_streams[i], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
input_data[i], 0, NULL, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clWriteImage failed: %d\n", err);
|
||||
free(input_data);
|
||||
free(input_streams);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
expected_output = generate_expected_byte_image(input_data, num_input_streams, img_width, img_height, 4);
|
||||
for ( i = 0; i < num_input_streams; i++)
|
||||
{
|
||||
free(input_data[i]);
|
||||
}
|
||||
free(input_data);
|
||||
}
|
||||
|
||||
// Set up the kernels.
|
||||
{
|
||||
cl_program program[2];
|
||||
|
||||
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &image_to_image_kernel_float_coord_code, "image_to_image_copy");
|
||||
if (err)
|
||||
{
|
||||
log_error("Failed to create kernel 2: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &image_sum_kernel_float_coord_code, "image_sum");
|
||||
if (err)
|
||||
{
|
||||
log_error("Failed to create kernel 3: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
clReleaseProgram(program[0]);
|
||||
clReleaseProgram(program[1]);
|
||||
}
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
{
|
||||
size_t threads[3] = {0, 0, 0};
|
||||
threads[0] = (size_t)img_width;
|
||||
threads[1] = (size_t)img_height;
|
||||
int i;
|
||||
|
||||
{
|
||||
cl_mem accum_input;
|
||||
cl_mem accum_output;
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof input_streams[0], &input_streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof accum_streams[0], &accum_streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 1; i < num_input_streams; i++)
|
||||
{
|
||||
accum_input = accum_streams[(i-1)%2];
|
||||
accum_output = accum_streams[i%2];
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof accum_input, &accum_input);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof input_streams[i], &input_streams[i]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof accum_output, &accum_output);
|
||||
err |= clSetKernelArg(kernel[1], 3, sizeof sampler, &sampler);
|
||||
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the last accum into the other one.
|
||||
accum_input = accum_streams[(i-1)%2];
|
||||
accum_output = accum_streams[i%2];
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof accum_input, &accum_input);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof accum_output, &accum_output);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
|
||||
err = clEnqueueReadImage(queue, accum_output, CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
(void *)output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadImage failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = verify_byte_image(expected_output, output_ptr, img_width, img_height, 4);
|
||||
if (err)
|
||||
{
|
||||
log_error("IMAGE_MULTIPASS test failed.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("IMAGE_MULTIPASS test passed\n");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// cleanup
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseMemObject(accum_streams[0]);
|
||||
clReleaseMemObject(accum_streams[1]);
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < num_input_streams; i++)
|
||||
{
|
||||
clReleaseMemObject(input_streams[i]);
|
||||
}
|
||||
}
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseKernel(kernel[1]);
|
||||
free(expected_output);
|
||||
free(output_ptr);
|
||||
free(input_streams);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
290
test_conformance/basic/test_image_param.c
Normal file
290
test_conformance/basic/test_image_param.c
Normal file
@@ -0,0 +1,290 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/imageHelpers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
|
||||
|
||||
static const char *param_kernel[] = {
|
||||
"__kernel void test_fn(read_only image2d_t srcimg, sampler_t sampler, __global float4 *results )\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" results[ tid_y * get_image_width( srcimg ) + tid_x ] = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
int validate_results( size_t width, size_t height, cl_image_format &format, char *inputData, cl_float *actualResults )
|
||||
{
|
||||
for( size_t i = 0; i < width * height; i++ )
|
||||
{
|
||||
cl_float expected[ 4 ], tolerance;
|
||||
|
||||
switch( format.image_channel_data_type )
|
||||
{
|
||||
case CL_UNORM_INT8:
|
||||
{
|
||||
cl_uchar *p = (cl_uchar *)inputData;
|
||||
expected[ 0 ] = p[ 0 ] / 255.f;
|
||||
expected[ 1 ] = p[ 1 ] / 255.f;
|
||||
expected[ 2 ] = p[ 2 ] / 255.f;
|
||||
expected[ 3 ] = p[ 3 ] / 255.f;
|
||||
tolerance = 1.f / 255.f;
|
||||
break;
|
||||
}
|
||||
case CL_SNORM_INT8:
|
||||
{
|
||||
cl_char *p = (cl_char *)inputData;
|
||||
expected[ 0 ] = fmaxf( p[ 0 ] / 127.f, -1.f );
|
||||
expected[ 1 ] = fmaxf( p[ 1 ] / 127.f, -1.f );
|
||||
expected[ 2 ] = fmaxf( p[ 2 ] / 127.f, -1.f );
|
||||
expected[ 3 ] = fmaxf( p[ 3 ] / 127.f, -1.f );
|
||||
tolerance = 1.f / 127.f;
|
||||
break;
|
||||
}
|
||||
case CL_UNSIGNED_INT8:
|
||||
{
|
||||
cl_uchar *p = (cl_uchar *)inputData;
|
||||
expected[ 0 ] = p[ 0 ];
|
||||
expected[ 1 ] = p[ 1 ];
|
||||
expected[ 2 ] = p[ 2 ];
|
||||
expected[ 3 ] = p[ 3 ];
|
||||
tolerance = 1.f / 127.f;
|
||||
break;
|
||||
}
|
||||
case CL_SIGNED_INT8:
|
||||
{
|
||||
cl_short *p = (cl_short *)inputData;
|
||||
expected[ 0 ] = p[ 0 ];
|
||||
expected[ 1 ] = p[ 1 ];
|
||||
expected[ 2 ] = p[ 2 ];
|
||||
expected[ 3 ] = p[ 3 ];
|
||||
tolerance = 1.f / 127.f;
|
||||
break;
|
||||
}
|
||||
case CL_UNORM_INT16:
|
||||
{
|
||||
cl_ushort *p = (cl_ushort *)inputData;
|
||||
expected[ 0 ] = p[ 0 ] / 65535.f;
|
||||
expected[ 1 ] = p[ 1 ] / 65535.f;
|
||||
expected[ 2 ] = p[ 2 ] / 65535.f;
|
||||
expected[ 3 ] = p[ 3 ] / 65535.f;
|
||||
tolerance = 1.f / 65535.f;
|
||||
break;
|
||||
}
|
||||
case CL_UNSIGNED_INT32:
|
||||
{
|
||||
cl_uint *p = (cl_uint *)inputData;
|
||||
expected[ 0 ] = p[ 0 ];
|
||||
expected[ 1 ] = p[ 1 ];
|
||||
expected[ 2 ] = p[ 2 ];
|
||||
expected[ 3 ] = p[ 3 ];
|
||||
tolerance = 0.0001f;
|
||||
break;
|
||||
}
|
||||
case CL_FLOAT:
|
||||
{
|
||||
cl_float *p = (cl_float *)inputData;
|
||||
expected[ 0 ] = p[ 0 ];
|
||||
expected[ 1 ] = p[ 1 ];
|
||||
expected[ 2 ] = p[ 2 ];
|
||||
expected[ 3 ] = p[ 3 ];
|
||||
tolerance = 0.0001f;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Should never get here
|
||||
break;
|
||||
}
|
||||
|
||||
if( format.image_channel_order == CL_BGRA )
|
||||
{
|
||||
cl_float tmp = expected[ 0 ];
|
||||
expected[ 0 ] = expected[ 2 ];
|
||||
expected[ 2 ] = tmp;
|
||||
}
|
||||
|
||||
// Within an error tolerance, make sure the results match
|
||||
cl_float error1 = fabsf( expected[ 0 ] - actualResults[ 0 ] );
|
||||
cl_float error2 = fabsf( expected[ 1 ] - actualResults[ 1 ] );
|
||||
cl_float error3 = fabsf( expected[ 2 ] - actualResults[ 2 ] );
|
||||
cl_float error4 = fabsf( expected[ 3 ] - actualResults[ 3 ] );
|
||||
|
||||
if( error1 > tolerance || error2 > tolerance || error3 > tolerance || error4 > tolerance )
|
||||
{
|
||||
log_error( "ERROR: Sample %d did not validate against expected results for %d x %d %s:%s image\n", (int)i, (int)width, (int)height,
|
||||
GetChannelOrderName( format.image_channel_order ), GetChannelTypeName( format.image_channel_data_type ) );
|
||||
log_error( " Expected: %f %f %f %f\n", (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ] );
|
||||
log_error( " Actual: %f %f %f %f\n", (float)actualResults[ 0 ], (float)actualResults[ 1 ], (float)actualResults[ 2 ], (float)actualResults[ 3 ] );
|
||||
|
||||
// Check real quick a special case error here
|
||||
cl_float error1 = fabsf( expected[ 3 ] - actualResults[ 0 ] );
|
||||
cl_float error2 = fabsf( expected[ 2 ] - actualResults[ 1 ] );
|
||||
cl_float error3 = fabsf( expected[ 1 ] - actualResults[ 2 ] );
|
||||
cl_float error4 = fabsf( expected[ 0 ] - actualResults[ 3 ] );
|
||||
if( error1 <= tolerance && error2 <= tolerance && error3 <= tolerance && error4 <= tolerance )
|
||||
{
|
||||
log_error( "\t(Kernel did not respect change in channel order)\n" );
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Increment and go
|
||||
actualResults += 4;
|
||||
inputData += get_format_type_size( &format ) * 4;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_image_param(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
size_t sizes[] = { 64, 100, 128, 250, 512 };
|
||||
cl_image_format formats[] = { { CL_RGBA, CL_UNORM_INT8 }, { CL_RGBA, CL_UNORM_INT16 }, { CL_RGBA, CL_FLOAT }, { CL_BGRA, CL_UNORM_INT8 } };
|
||||
cl_image_format *supported_formats;
|
||||
ExplicitType types[] = { kUChar, kUShort, kFloat, kUChar };
|
||||
int error;
|
||||
size_t i, j, idx;
|
||||
size_t threads[ 2 ];
|
||||
MTdata d;
|
||||
int supportsBGRA = 0;
|
||||
cl_uint numSupportedFormats = 0;
|
||||
|
||||
const size_t numSizes = sizeof( sizes ) / sizeof( sizes[ 0 ] );
|
||||
const size_t numFormats = sizeof( formats ) / sizeof( formats[ 0 ] );
|
||||
const size_t numAttempts = numSizes * numFormats;
|
||||
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ numAttempts ][ 2 ];
|
||||
BufferOwningPtr<char> inputs[ numAttempts ];
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
if(gIsEmbedded)
|
||||
{
|
||||
/* Get the supported image formats to see if BGRA is supported */
|
||||
clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &numSupportedFormats);
|
||||
supported_formats = (cl_image_format *) malloc(sizeof(cl_image_format) * numSupportedFormats);
|
||||
clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, numFormats, supported_formats, NULL);
|
||||
|
||||
for(i = 0; i < numSupportedFormats; i++)
|
||||
{
|
||||
if(supported_formats[i].image_channel_order == CL_BGRA)
|
||||
{
|
||||
supportsBGRA = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
supportsBGRA = 1;
|
||||
}
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for( i = 0, idx = 0; i < numSizes; i++ )
|
||||
{
|
||||
for( j = 0; j < numFormats; j++, idx++ )
|
||||
{
|
||||
if(formats[j].image_channel_order == CL_BGRA && !supportsBGRA)
|
||||
continue;
|
||||
|
||||
// For each attempt, we create a pair: an input image, whose parameters keep changing, and an output buffer
|
||||
// that we can read values from. The output buffer will remain consistent to ensure that any changes we
|
||||
// witness are due to the image changes
|
||||
inputs[ idx ].reset(create_random_data( types[ j ], d, sizes[ i ] * sizes[ i ] * 4 ));
|
||||
|
||||
streams[ idx ][ 0 ] = create_image_2d( context, CL_MEM_COPY_HOST_PTR, &formats[ j ], sizes[ i ], sizes[ i ], 0, inputs[ idx ], &error );
|
||||
{
|
||||
char err_str[256];
|
||||
sprintf(err_str, "Unable to create input image for format %s order %s" ,
|
||||
GetChannelOrderName( formats[j].image_channel_order ),
|
||||
GetChannelTypeName( formats[j].image_channel_data_type ));
|
||||
test_error( error, err_str);
|
||||
}
|
||||
|
||||
streams[ idx ][ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), NULL, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
}
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
// Create a single kernel to use for all the tests
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, param_kernel, "test_fn" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
// Also create a sampler to use for all the runs
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
clSamplerWrapper sampler = clCreateSamplerWithProperties(context, properties, &error);
|
||||
test_error(error, "clCreateSamplerWithProperties failed");
|
||||
|
||||
// Set up the arguments for each and queue
|
||||
for( i = 0, idx = 0; i < numSizes; i++ )
|
||||
{
|
||||
for( j = 0; j < numFormats; j++, idx++ )
|
||||
{
|
||||
if(formats[j].image_channel_order == CL_BGRA && !supportsBGRA)
|
||||
continue;
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[ idx ][ 0 ] ), &streams[ idx ][ 0 ] );
|
||||
error |= clSetKernelArg( kernel, 1, sizeof( sampler ), &sampler );
|
||||
error |= clSetKernelArg( kernel, 2, sizeof( streams[ idx ][ 1 ] ), &streams[ idx ][ 1 ]);
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
threads[ 0 ] = threads[ 1 ] = (size_t)sizes[ i ];
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "clEnqueueNDRangeKernel failed" );
|
||||
}
|
||||
}
|
||||
|
||||
// Now go through each combo and validate the results
|
||||
for( i = 0, idx = 0; i < numSizes; i++ )
|
||||
{
|
||||
for( j = 0; j < numFormats; j++, idx++ )
|
||||
{
|
||||
if(formats[j].image_channel_order == CL_BGRA && !supportsBGRA)
|
||||
continue;
|
||||
|
||||
BufferOwningPtr<cl_float> output(malloc(sizeof(cl_float) * sizes[ i ] * sizes[ i ] * 4 ));
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[ idx ][ 1 ], CL_TRUE, 0, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), output, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
error = validate_results( sizes[ i ], sizes[ i ], formats[ j ], inputs[ idx ], output );
|
||||
if( error )
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
183
test_conformance/basic/test_image_r8.c
Normal file
183
test_conformance/basic/test_image_r8.c
Normal file
@@ -0,0 +1,183 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *r_uint8_kernel_code =
|
||||
"__kernel void test_r_uint8(read_only image2d_t srcimg, __global unsigned char *dst, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
|
||||
" uint4 color;\n"
|
||||
"\n"
|
||||
" color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" dst[indx] = (unsigned char)(color.x);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static unsigned char *
|
||||
generate_8bit_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * sizeof(unsigned char));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_8bit_image(unsigned char *image, unsigned char *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("READ_IMAGE_R_UNSIGNED_INT8 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("READ_IMAGE_R_UNSIGNED_INT8 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
test_image_r8(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_image_format img_format;
|
||||
cl_uchar *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[3];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int err;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
img_format.image_channel_order = CL_R;
|
||||
img_format.image_channel_data_type = CL_UNSIGNED_INT8;
|
||||
|
||||
// early out if this image type is not supported
|
||||
if( ! is_image_format_supported( context, (cl_mem_flags)(CL_MEM_READ_ONLY), CL_MEM_OBJECT_IMAGE2D, &img_format ) ) {
|
||||
log_info("WARNING: Image type not supported; skipping test.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
input_ptr = generate_8bit_image(img_width, img_height, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
output_ptr = (cl_uchar*)malloc(sizeof(cl_uchar) * img_width * img_height);
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_ONLY), &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uchar) * img_width*img_height, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0,0,0}, region[3]={img_width, img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
input_ptr,
|
||||
0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage failed: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &r_uint8_kernel_code, "test_r_uint8" );
|
||||
if (err) {
|
||||
log_error("Failed to create kernel and program: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (size_t)img_width;
|
||||
threads[1] = (size_t)img_height;
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_uchar)*img_width*img_height, (void *)output_ptr, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_8bit_image(input_ptr, output_ptr, img_width, img_height);
|
||||
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
clReleaseSampler(sampler);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
146
test_conformance/basic/test_imagearraycopy.c
Normal file
146
test_conformance/basic/test_imagearraycopy.c
Normal file
@@ -0,0 +1,146 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
int test_imagearraycopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
|
||||
{
|
||||
cl_uchar *imgptr, *bufptr;
|
||||
clMemWrapper image, buffer;
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
size_t elem_size;
|
||||
size_t buffer_size;
|
||||
int i;
|
||||
cl_int err;
|
||||
MTdata d;
|
||||
cl_event copyevent;
|
||||
|
||||
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
|
||||
|
||||
image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
|
||||
test_error(err, "clGetImageInfo failed");
|
||||
|
||||
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
|
||||
|
||||
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
imgptr = (cl_uchar*)malloc(buffer_size);
|
||||
for (i=0; i<(int)buffer_size; i++) {
|
||||
imgptr[i] = (cl_uchar)genrand_int32(d);
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
|
||||
err = clEnqueueWriteImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = clEnqueueCopyImageToBuffer( queue, image, buffer, origin, region, 0, 0, NULL, ©event );
|
||||
test_error(err, "clEnqueueCopyImageToBuffer failed");
|
||||
|
||||
bufptr = (cl_uchar*)malloc(buffer_size);
|
||||
|
||||
err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 1, ©event, NULL);
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
err = clReleaseEvent(copyevent);
|
||||
test_error(err, "clReleaseEvent failed");
|
||||
|
||||
if (memcmp(imgptr, bufptr, buffer_size) != 0) {
|
||||
log_error( "ERROR: Results did not validate!\n" );
|
||||
unsigned char * inchar = (unsigned char*)imgptr;
|
||||
unsigned char * outchar = (unsigned char*)bufptr;
|
||||
int failuresPrinted = 0;
|
||||
int i;
|
||||
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
|
||||
int failed = 0;
|
||||
int j;
|
||||
for (j=0; j<(int)elem_size; j++)
|
||||
if (inchar[i+j] != outchar[i+j])
|
||||
failed = 1;
|
||||
char values[4096];
|
||||
values[0] = 0;
|
||||
if (failed) {
|
||||
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
|
||||
int j;
|
||||
for (j=0; j<(int)elem_size; j++)
|
||||
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
|
||||
sprintf(values + strlen(values), "] != expected [");
|
||||
for (j=0; j<(int)elem_size; j++)
|
||||
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
|
||||
sprintf(values + strlen(values), "]");
|
||||
log_error("%s\n", values);
|
||||
failuresPrinted++;
|
||||
}
|
||||
if (failuresPrinted > 5) {
|
||||
log_error("Not printing further failures...\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
err = -1;
|
||||
}
|
||||
|
||||
free(imgptr);
|
||||
free(bufptr);
|
||||
|
||||
if (err)
|
||||
log_error("IMAGE to ARRAY copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
|
||||
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int test_imagearraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_int err;
|
||||
cl_image_format *formats;
|
||||
cl_uint num_formats;
|
||||
cl_uint i;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
|
||||
test_error(err, "clGetSupportedImageFormats failed");
|
||||
|
||||
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
|
||||
|
||||
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
|
||||
test_error(err, "clGetSupportedImageFormats failed");
|
||||
|
||||
for (i = 0; i < num_formats; i++) {
|
||||
err |= test_imagearraycopy_single_format(device, context, queue, &formats[i]);
|
||||
}
|
||||
|
||||
free(formats);
|
||||
if (err)
|
||||
log_error("IMAGE to ARRAY copy test failed\n");
|
||||
else
|
||||
log_info("IMAGE to ARRAY copy test passed\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
143
test_conformance/basic/test_imagearraycopy3d.c
Normal file
143
test_conformance/basic/test_imagearraycopy3d.c
Normal file
@@ -0,0 +1,143 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
int test_imagearraycopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
|
||||
{
|
||||
cl_uchar *imgptr, *bufptr;
|
||||
clMemWrapper image, buffer;
|
||||
int img_width = 128;
|
||||
int img_height = 128;
|
||||
int img_depth = 32;
|
||||
size_t elem_size;
|
||||
size_t buffer_size;
|
||||
int i;
|
||||
cl_int err;
|
||||
MTdata d;
|
||||
|
||||
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
|
||||
|
||||
image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
|
||||
test_error(err, "clGetImageInfo failed");
|
||||
|
||||
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
|
||||
|
||||
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
imgptr = (cl_uchar*)malloc(buffer_size);
|
||||
for (i=0; i<(int)buffer_size; i++) {
|
||||
imgptr[i] = (cl_uchar)genrand_int32(d);
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
|
||||
err = clEnqueueWriteImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = clEnqueueCopyImageToBuffer( queue, image, buffer, origin, region, 0, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueCopyImageToBuffer failed");
|
||||
|
||||
bufptr = (cl_uchar*)malloc(buffer_size);
|
||||
|
||||
err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
if (memcmp(imgptr, bufptr, buffer_size) != 0) {
|
||||
log_error( "ERROR: Results did not validate!\n" );
|
||||
unsigned char * inchar = (unsigned char*)imgptr;
|
||||
unsigned char * outchar = (unsigned char*)bufptr;
|
||||
int failuresPrinted = 0;
|
||||
int i;
|
||||
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
|
||||
int failed = 0;
|
||||
int j;
|
||||
for (j=0; j<(int)elem_size; j++)
|
||||
if (inchar[i+j] != outchar[i+j])
|
||||
failed = 1;
|
||||
char values[4096];
|
||||
values[0] = 0;
|
||||
if (failed) {
|
||||
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
|
||||
int j;
|
||||
for (j=0; j<(int)elem_size; j++)
|
||||
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
|
||||
sprintf(values + strlen(values), "] != expected [");
|
||||
for (j=0; j<(int)elem_size; j++)
|
||||
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
|
||||
sprintf(values + strlen(values), "]");
|
||||
log_error("%s\n", values);
|
||||
failuresPrinted++;
|
||||
}
|
||||
if (failuresPrinted > 5) {
|
||||
log_error("Not printing further failures...\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
err = -1;
|
||||
}
|
||||
|
||||
free(imgptr);
|
||||
free(bufptr);
|
||||
|
||||
if (err)
|
||||
log_error("IMAGE3D to ARRAY copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
|
||||
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int test_imagearraycopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_int err;
|
||||
cl_image_format *formats;
|
||||
cl_uint num_formats;
|
||||
cl_uint i;
|
||||
|
||||
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
|
||||
|
||||
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
|
||||
test_error(err, "clGetSupportedImageFormats failed");
|
||||
|
||||
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
|
||||
|
||||
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
|
||||
test_error(err, "clGetSupportedImageFormats failed");
|
||||
|
||||
for (i = 0; i < num_formats; i++) {
|
||||
err |= test_imagearraycopy3d_single_format(device, context, queue, &formats[i]);
|
||||
}
|
||||
|
||||
free(formats);
|
||||
if (err)
|
||||
log_error("IMAGE3D to ARRAY copy test failed\n");
|
||||
else
|
||||
log_info("IMAGE3D to ARRAY copy test passed\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
235
test_conformance/basic/test_imagecopy.c
Normal file
235
test_conformance/basic/test_imagecopy.c
Normal file
@@ -0,0 +1,235 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static unsigned char *
|
||||
generate_rgba8_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static unsigned short *
|
||||
generate_rgba16_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned short *ptr = (unsigned short *)malloc(w * h * 4 * sizeof(unsigned short));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned short)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static float *
|
||||
generate_rgbafp_image(int w, int h, MTdata d)
|
||||
{
|
||||
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgbafp_image(float *image, float *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_imagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_image_format img_format;
|
||||
unsigned char *rgba8_inptr, *rgba8_outptr;
|
||||
unsigned short *rgba16_inptr, *rgba16_outptr;
|
||||
float *rgbafp_inptr, *rgbafp_outptr;
|
||||
clMemWrapper streams[6];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int i, err;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
|
||||
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
|
||||
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
|
||||
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
void *p, *outp;
|
||||
int x, y, delta_w = img_width/8, delta_h = img_height/16;
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
p = (void *)rgba8_inptr;
|
||||
outp = (void *)rgba8_outptr;
|
||||
log_info("Testing CL_RGBA CL_UNORM_INT8\n");
|
||||
break;
|
||||
case 1:
|
||||
p = (void *)rgba16_inptr;
|
||||
outp = (void *)rgba16_outptr;
|
||||
log_info("Testing CL_RGBA CL_UNORM_INT16\n");
|
||||
break;
|
||||
case 2:
|
||||
p = (void *)rgbafp_inptr;
|
||||
outp = (void *)rgbafp_outptr;
|
||||
log_info("Testing CL_RGBA CL_FLOAT\n");
|
||||
break;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
int copy_number = 0;
|
||||
for (y=0; y<img_height; y+=delta_h)
|
||||
{
|
||||
for (x=0; x<img_width; x+=delta_w)
|
||||
{
|
||||
copy_number++;
|
||||
size_t copy_origin[3] = {x,y,0}, copy_region[3]={delta_w, delta_h, 1};
|
||||
err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1],
|
||||
copy_origin, copy_origin, copy_region,
|
||||
0, NULL, NULL);
|
||||
if (err) {
|
||||
log_error("Copy %d (origin [%d, %d], size [%d, %d], image size [%d x %d]) Failed\n", copy_number, x, y, delta_w, delta_h, img_width, img_height);
|
||||
}
|
||||
test_error(err, "clEnqueueCopyImage failed");
|
||||
}
|
||||
}
|
||||
|
||||
err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueReadImage failed");
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
|
||||
break;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
free(rgba8_inptr);
|
||||
free(rgba16_inptr);
|
||||
free(rgbafp_inptr);
|
||||
free(rgba8_outptr);
|
||||
free(rgba16_outptr);
|
||||
free(rgbafp_outptr);
|
||||
|
||||
if (err)
|
||||
log_error("IMAGE copy test failed\n");
|
||||
else
|
||||
log_info("IMAGE copy test passed\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
238
test_conformance/basic/test_imagecopy3d.c
Normal file
238
test_conformance/basic/test_imagecopy3d.c
Normal file
@@ -0,0 +1,238 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static unsigned char *
|
||||
generate_uint8_image(unsigned num_elements, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(num_elements);
|
||||
unsigned i;
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_uint8_image(unsigned char *image, unsigned char *outptr, unsigned num_elements)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static unsigned short *
|
||||
generate_uint16_image(unsigned num_elements, MTdata d)
|
||||
{
|
||||
unsigned short *ptr = (unsigned short *)malloc(num_elements * sizeof(unsigned short));
|
||||
unsigned i;
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
ptr[i] = (unsigned short)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_uint16_image(unsigned short *image, unsigned short *outptr, unsigned num_elements)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static float *
|
||||
generate_float_image(unsigned num_elements, MTdata d)
|
||||
{
|
||||
float *ptr = (float*)malloc(num_elements * sizeof(float));
|
||||
unsigned i;
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_float_image(float *image, float *outptr, unsigned num_elements)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_imagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements_ignored)
|
||||
{
|
||||
cl_image_format img_format;
|
||||
unsigned char *rgba8_inptr, *rgba8_outptr;
|
||||
unsigned short *rgba16_inptr, *rgba16_outptr;
|
||||
float *rgbafp_inptr, *rgbafp_outptr;
|
||||
clMemWrapper streams[6];
|
||||
int img_width = 128;
|
||||
int img_height = 128;
|
||||
int img_depth = 64;
|
||||
int i;
|
||||
cl_int err;
|
||||
unsigned num_elements = img_width * img_height * img_depth * 4;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
rgba8_inptr = (unsigned char *)generate_uint8_image(num_elements, d);
|
||||
rgba16_inptr = (unsigned short *)generate_uint16_image(num_elements, d);
|
||||
rgbafp_inptr = (float *)generate_float_image(num_elements, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * num_elements);
|
||||
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * num_elements);
|
||||
rgbafp_outptr = (float*)malloc(sizeof(float) * num_elements);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
streams[3] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[4] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
streams[5] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
void *p, *outp;
|
||||
int x, y, z, delta_w = img_width/8, delta_h = img_height/16, delta_d = img_depth/4;
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
p = (void *)rgba8_inptr;
|
||||
outp = (void *)rgba8_outptr;
|
||||
break;
|
||||
case 1:
|
||||
p = (void *)rgba16_inptr;
|
||||
outp = (void *)rgba16_outptr;
|
||||
break;
|
||||
case 2:
|
||||
p = (void *)rgbafp_inptr;
|
||||
outp = (void *)rgbafp_outptr;
|
||||
break;
|
||||
}
|
||||
|
||||
size_t origin[3]={0,0,0}, region[3]={img_width, img_height, img_depth};
|
||||
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteImage failed");
|
||||
|
||||
for (z=0; z<img_depth; z+=delta_d)
|
||||
{
|
||||
for (y=0; y<img_height; y+=delta_h)
|
||||
{
|
||||
for (x=0; x<img_width; x+=delta_w)
|
||||
{
|
||||
origin[0] = x; origin[1] = y; origin[2] = z;
|
||||
region[0] = delta_w; region[1] = delta_h; region[2] = delta_d;
|
||||
|
||||
err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueCopyImage failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
origin[0] = 0; origin[1] = 0; origin[2] = 0;
|
||||
region[0] = img_width; region[1] = img_height; region[2] = img_depth;
|
||||
err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueReadImage failed");
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_uint8_image(rgba8_inptr, rgba8_outptr, num_elements);
|
||||
if (err) log_error("Failed uint8\n");
|
||||
break;
|
||||
case 1:
|
||||
err = verify_uint16_image(rgba16_inptr, rgba16_outptr, num_elements);
|
||||
if (err) log_error("Failed uint16\n");
|
||||
break;
|
||||
case 2:
|
||||
err = verify_float_image(rgbafp_inptr, rgbafp_outptr, num_elements);
|
||||
if (err) log_error("Failed float\n");
|
||||
break;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
free(rgba8_inptr);
|
||||
free(rgba16_inptr);
|
||||
free(rgbafp_inptr);
|
||||
free(rgba8_outptr);
|
||||
free(rgba16_outptr);
|
||||
free(rgbafp_outptr);
|
||||
|
||||
if (err)
|
||||
log_error("IMAGE3D copy test failed\n");
|
||||
else
|
||||
log_info("IMAGE3D copy test passed\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
524
test_conformance/basic/test_imagedim.c
Normal file
524
test_conformance/basic/test_imagedim.c
Normal file
@@ -0,0 +1,524 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *image_dim_kernel_code =
|
||||
"\n"
|
||||
"__kernel void test_image_dim(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static unsigned char *
|
||||
generate_8888_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_image_format img_format;
|
||||
unsigned char *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[2];
|
||||
cl_ulong max_mem_size;
|
||||
int img_width, max_img_width;
|
||||
int img_height, max_img_height;
|
||||
int max_img_dim;
|
||||
int i, j, i2, j2, err=0;
|
||||
size_t max_image2d_width, max_image2d_height;
|
||||
int total_errors = 0;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
err = create_single_kernel_helper( context, &program, &kernel, 1, &image_dim_kernel_code, "test_image_dim" );
|
||||
if (err)
|
||||
{
|
||||
log_error("create_program_and_kernel_with_sources failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
|
||||
max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
|
||||
|
||||
if (max_mem_size > (cl_ulong)SIZE_MAX) {
|
||||
max_mem_size = (cl_ulong)SIZE_MAX;
|
||||
}
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
max_img_width = (int)max_image2d_width;
|
||||
max_img_height = (int)max_image2d_height;
|
||||
|
||||
// determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
|
||||
// and we want to consume 1/4 of global memory (this is the minimum required to be
|
||||
// supported by the spec)
|
||||
max_mem_size /= 4; // use 1/4
|
||||
max_mem_size /= 4; // 4 bytes per pixel
|
||||
max_img_dim = (int)sqrt((double)max_mem_size);
|
||||
// convert to a power of 2
|
||||
{
|
||||
unsigned int n = (unsigned int)max_img_dim;
|
||||
unsigned int m = 0x80000000;
|
||||
|
||||
// round-down to the nearest power of 2
|
||||
while (m > n)
|
||||
m >>= 1;
|
||||
|
||||
max_img_dim = (int)m;
|
||||
}
|
||||
|
||||
if (max_img_width > max_img_dim)
|
||||
max_img_width = max_img_dim;
|
||||
if (max_img_height > max_img_dim)
|
||||
max_img_height = max_img_dim;
|
||||
|
||||
log_info("Adjusted maximum image size to test is %d x %d, which is a max mem size of %gMB.\n",
|
||||
max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
input_ptr = generate_8888_image(max_img_width, max_img_height, d);
|
||||
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height);
|
||||
|
||||
// test power of 2 width, height starting at 1 to 4K
|
||||
for (i=1,i2=0; i<=max_img_height; i<<=1,i2++)
|
||||
{
|
||||
img_height = (1 << i2);
|
||||
for (j=1,j2=0; j<=max_img_width; j<<=1,j2++)
|
||||
{
|
||||
img_width = (1 << j2);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed. width = %d, height = %d\n", img_width, img_height);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("create_image_2d failed. width = %d, height = %d\n", img_width, img_height);
|
||||
clReleaseMemObject(streams[0]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0,0,0};
|
||||
size_t region[3] = {img_width, img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage failed\n");
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (size_t)img_width;
|
||||
threads[1] = (size_t)img_height;
|
||||
log_info("Testing image dimensions %d x %d with local threads NULL.\n", img_width, img_height);
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
log_error("Image Dimension test failed. image width = %d, image height = %d, local NULL\n",
|
||||
img_width, img_height);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadImage failed\n");
|
||||
log_error("Image Dimension test failed. image width = %d, image height = %d, local NULL\n",
|
||||
img_width, img_height);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
err = verify_8888_image(input_ptr, output_ptr, img_width, img_height);
|
||||
if (err)
|
||||
{
|
||||
total_errors++;
|
||||
log_error("Image Dimension test failed. image width = %d, image height = %d\n", img_width, img_height);
|
||||
}
|
||||
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
}
|
||||
}
|
||||
|
||||
// cleanup
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
|
||||
return total_errors;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int
|
||||
test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_image_format img_format;
|
||||
unsigned char *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[2], local_threads[2];
|
||||
cl_ulong max_mem_size;
|
||||
int img_width, max_img_width;
|
||||
int img_height, max_img_height;
|
||||
int max_img_dim;
|
||||
int i, j, i2, j2, err=0;
|
||||
size_t max_image2d_width, max_image2d_height;
|
||||
int total_errors = 0;
|
||||
size_t max_local_workgroup_size[3];
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
err = create_single_kernel_helper( context, &program, &kernel, 1, &image_dim_kernel_code, "test_image_dim" );
|
||||
if (err)
|
||||
{
|
||||
log_error("create_program_and_kernel_with_sources failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t work_group_size = 0;
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(work_group_size), &work_group_size, NULL);
|
||||
test_error(err, "clGetKerenlWorkgroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
|
||||
max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
max_img_width = (int)max_image2d_width;
|
||||
max_img_height = (int)max_image2d_height;
|
||||
|
||||
if (max_mem_size > (cl_ulong)SIZE_MAX) {
|
||||
max_mem_size = (cl_ulong)SIZE_MAX;
|
||||
}
|
||||
|
||||
// determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
|
||||
// and we want to consume 1/4 of global memory (this is the minimum required to be
|
||||
// supported by the spec)
|
||||
max_mem_size /= 4; // use 1/4
|
||||
max_mem_size /= 4; // 4 bytes per pixel
|
||||
max_img_dim = (int)sqrt((double)max_mem_size);
|
||||
// convert to a power of 2
|
||||
{
|
||||
unsigned int n = (unsigned int)max_img_dim;
|
||||
unsigned int m = 0x80000000;
|
||||
|
||||
// round-down to the nearest power of 2
|
||||
while (m > n)
|
||||
m >>= 1;
|
||||
|
||||
max_img_dim = (int)m;
|
||||
}
|
||||
|
||||
if (max_img_width > max_img_dim)
|
||||
max_img_width = max_img_dim;
|
||||
if (max_img_height > max_img_dim)
|
||||
max_img_height = max_img_dim;
|
||||
|
||||
log_info("Adjusted maximum image size to test is %d x %d, which is a max mem size of %gMB.\n",
|
||||
max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
input_ptr = generate_8888_image(max_img_width, max_img_height, d);
|
||||
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height);
|
||||
|
||||
int plus_minus;
|
||||
for (plus_minus=0; plus_minus < 3; plus_minus++)
|
||||
{
|
||||
|
||||
// test power of 2 width, height starting at 1 to 4K
|
||||
for (i=2,i2=1; i<=max_img_height; i<<=1,i2++)
|
||||
{
|
||||
img_height = (1 << i2);
|
||||
for (j=2,j2=1; j<=max_img_width; j<<=1,j2++)
|
||||
{
|
||||
img_width = (1 << j2);
|
||||
|
||||
int effective_img_height = img_height;
|
||||
int effective_img_width = img_width;
|
||||
|
||||
local_threads[0] = 1;
|
||||
local_threads[1] = 1;
|
||||
|
||||
switch (plus_minus) {
|
||||
case 0:
|
||||
effective_img_height--;
|
||||
local_threads[0] = work_group_size > max_local_workgroup_size[0] ? max_local_workgroup_size[0] : work_group_size;
|
||||
while (img_width%local_threads[0] != 0)
|
||||
local_threads[0]--;
|
||||
break;
|
||||
case 1:
|
||||
effective_img_width--;
|
||||
local_threads[1] = work_group_size > max_local_workgroup_size[1] ? max_local_workgroup_size[1] : work_group_size;
|
||||
while (img_height%local_threads[1] != 0)
|
||||
local_threads[1]--;
|
||||
break;
|
||||
case 2:
|
||||
effective_img_width--;
|
||||
effective_img_height--;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed. width = %d, height = %d\n", effective_img_width, effective_img_height);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("create_image_2d failed. width = %d, height = %d\n", effective_img_width, effective_img_height);
|
||||
clReleaseMemObject(streams[0]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0,0,0};
|
||||
size_t region[3] = {effective_img_width, effective_img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage failed\n");
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (size_t)effective_img_width;
|
||||
threads[1] = (size_t)effective_img_height;
|
||||
log_info("Testing image dimensions %d x %d with local threads %d x %d.\n",
|
||||
effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, local_threads, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
log_error("Image Dimension test failed. image width = %d, image height = %d, local %d x %d\n",
|
||||
effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadImage failed\n");
|
||||
log_error("Image Dimension test failed. image width = %d, image height = %d, local %d x %d\n",
|
||||
effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
err = verify_8888_image(input_ptr, output_ptr, effective_img_width, effective_img_height);
|
||||
if (err)
|
||||
{
|
||||
total_errors++;
|
||||
log_error("Image Dimension test failed. image width = %d, image height = %d\n", effective_img_width, effective_img_height);
|
||||
}
|
||||
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// cleanup
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
|
||||
return total_errors;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
226
test_conformance/basic/test_imagenpot.c
Normal file
226
test_conformance/basic/test_imagenpot.c
Normal file
@@ -0,0 +1,226 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *rgba8888_kernel_code =
|
||||
"\n"
|
||||
"__kernel void test_rgba8888(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" if ( (tid_x >= get_image_width(dstimg)) || (tid_y >= get_image_height(dstimg)) )\n"
|
||||
" return;\n"
|
||||
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static unsigned char *
|
||||
generate_8888_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgba8888_image(unsigned char *src, unsigned char *dst, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (dst[i] != src[i])
|
||||
{
|
||||
log_error("NPOT_IMAGE_RGBA_UNORM_INT8 test for width = %d, height = %d failed\n", w, h);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("NPOT_IMAGE_RGBA_UNORM_INT8 test for width = %d, height = %d passed\n", w, h);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int img_width_selection[] = { 97, 111, 322, 479 };
|
||||
int img_height_selection[] = { 149, 222, 754, 385 };
|
||||
|
||||
int
|
||||
test_imagenpot(cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_image_format img_format;
|
||||
unsigned char *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t global_threads[3], local_threads[3];
|
||||
size_t local_workgroup_size;
|
||||
int img_width;
|
||||
int img_height;
|
||||
int err;
|
||||
cl_uint m;
|
||||
size_t max_local_workgroup_size[3];
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device_id )
|
||||
|
||||
cl_device_type device_type;
|
||||
err = clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
|
||||
if (err) {
|
||||
log_error("Failed to get device type: %d\n",err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (m=0; m<sizeof(img_width_selection)/sizeof(int); m++)
|
||||
{
|
||||
img_width = img_width_selection[m];
|
||||
img_height = img_height_selection[m];
|
||||
input_ptr = generate_8888_image(img_width, img_height, d);
|
||||
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format,
|
||||
img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format,
|
||||
img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
input_ptr,
|
||||
0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage failed\n");
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba8888_kernel_code, "test_rgba8888" );
|
||||
if (err)
|
||||
{
|
||||
log_error("Failed to create kernel and program: %d\n", err);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local_workgroup_size), &local_workgroup_size, NULL);
|
||||
test_error(err, "clGetKernelWorkGroupInfo for CL_KERNEL_WORK_GROUP_SIZE failed");
|
||||
|
||||
err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
// Pick the minimum of the device and the kernel
|
||||
if (local_workgroup_size > max_local_workgroup_size[0])
|
||||
local_workgroup_size = max_local_workgroup_size[0];
|
||||
|
||||
global_threads[0] = ((img_width + local_workgroup_size - 1) / local_workgroup_size) * local_workgroup_size;
|
||||
global_threads[1] = img_height;
|
||||
local_threads[0] = local_workgroup_size;
|
||||
local_threads[1] = 1;
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, global_threads, local_threads, 0, NULL, NULL );
|
||||
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadImage(queue, streams[1], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
(void *)output_ptr,
|
||||
0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_rgba8888_image(input_ptr, output_ptr, img_width, img_height);
|
||||
|
||||
// cleanup
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
270
test_conformance/basic/test_imagerandomcopy.c
Normal file
270
test_conformance/basic/test_imagerandomcopy.c
Normal file
@@ -0,0 +1,270 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static unsigned char *
|
||||
generate_rgba8_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int x, int y, int w, int h, int img_width)
|
||||
{
|
||||
int i, j, indx;
|
||||
|
||||
for (j=y; j<(y+h); j++)
|
||||
{
|
||||
indx = j*img_width*4;
|
||||
for (i=x*4; i<(x+w)*4; i++)
|
||||
{
|
||||
if (outptr[indx+i] != image[indx+i])
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static unsigned short *
|
||||
generate_rgba16_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned short *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned short)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgba16_image(unsigned short *image, unsigned short *outptr, int x, int y, int w, int h, int img_width)
|
||||
{
|
||||
int i, j, indx;
|
||||
|
||||
for (j=y; j<(y+h); j++)
|
||||
{
|
||||
indx = j*img_width*4;
|
||||
for (i=x*4; i<(x+w)*4; i++)
|
||||
{
|
||||
if (outptr[indx+i] != image[indx+i])
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static float *
|
||||
generate_rgbafp_image(int w, int h, MTdata d)
|
||||
{
|
||||
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgbafp_image(float *image, float *outptr, int x, int y, int w, int h, int img_width)
|
||||
{
|
||||
int i, j, indx;
|
||||
|
||||
for (j=y; j<(y+h); j++)
|
||||
{
|
||||
indx = j*img_width*4;
|
||||
for (i=x*4; i<(x+w)*4; i++)
|
||||
{
|
||||
if (outptr[indx+i] != image[indx+i])
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#define NUM_COPIES 10
|
||||
static const char *test_str_names[] = { "CL_RGBA CL_UNORM_INT8", "CL_RGBA CL_UNORM_INT16", "CL_RGBA CL_FLOAT" };
|
||||
|
||||
int
|
||||
test_imagerandomcopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_image_format img_format;
|
||||
unsigned char *rgba8_inptr, *rgba8_outptr;
|
||||
unsigned short *rgba16_inptr, *rgba16_outptr;
|
||||
float *rgbafp_inptr, *rgbafp_outptr;
|
||||
clMemWrapper streams[6];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int i, j;
|
||||
cl_int err;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
log_info("Testing with image %d x %d.\n", img_width, img_height);
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
|
||||
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
|
||||
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
|
||||
|
||||
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
|
||||
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
void *p, *outp;
|
||||
unsigned int x[2], y[2], delta_w, delta_h ;
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
p = (void *)rgba8_inptr;
|
||||
outp = (void *)rgba8_outptr;
|
||||
break;
|
||||
case 1:
|
||||
p = (void *)rgba16_inptr;
|
||||
outp = (void *)rgba16_outptr;
|
||||
break;
|
||||
case 2:
|
||||
p = (void *)rgbafp_inptr;
|
||||
outp = (void *)rgbafp_outptr;
|
||||
break;
|
||||
}
|
||||
|
||||
size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1};
|
||||
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
|
||||
// err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL);
|
||||
test_error(err, "clEnqueueWriteImage failed");
|
||||
|
||||
for (j=0; j<NUM_COPIES; j++)
|
||||
{
|
||||
x[0] = (int)get_random_float(0, img_width, d);
|
||||
do
|
||||
{
|
||||
x[1] = (int)get_random_float(0, img_width, d);
|
||||
} while (x[1] <= x[0]);
|
||||
|
||||
y[0] = (int)get_random_float(0, img_height, d);
|
||||
do
|
||||
{
|
||||
y[1] = (int)get_random_float(0, img_height, d);
|
||||
} while (y[1] <= y[0]);
|
||||
|
||||
delta_w = x[1] - x[0];
|
||||
delta_h = y[1] - y[0];
|
||||
log_info("Testing clCopyImage for %s: x = %d, y = %d, w = %d, h = %d\n", test_str_names[i], x[0], y[0], delta_w, delta_h);
|
||||
origin[0] = x[0];
|
||||
origin[1] = y[0];
|
||||
origin[2] = 0;
|
||||
region[0] = delta_w;
|
||||
region[1] = delta_h;
|
||||
region[2] = 1;
|
||||
err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
|
||||
// err = clCopyImage(context, streams[i*2], streams[i*2+1],
|
||||
// x[0], y[0], 0, x[0], y[0], 0, delta_w, delta_h, 0, NULL);
|
||||
test_error(err, "clEnqueueCopyImage failed");
|
||||
|
||||
origin[0] = 0;
|
||||
origin[1] = 0;
|
||||
origin[2] = 0;
|
||||
region[0] = img_width;
|
||||
region[1] = img_height;
|
||||
region[2] = 1;
|
||||
err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
|
||||
// err = clReadImage(context, streams[i*2+1], false, 0, 0, 0, img_width, img_height, 0, 0, 0, outp, NULL);
|
||||
test_error(err, "clEnqueueReadImage failed");
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, x[0], y[0], delta_w, delta_h, img_width);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, x[0], y[0], delta_w, delta_h, img_width);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, x[0], y[0], delta_w, delta_h, img_width);
|
||||
break;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
free_mtdata(d); d = NULL;
|
||||
free(rgba8_inptr);
|
||||
free(rgba16_inptr);
|
||||
free(rgbafp_inptr);
|
||||
free(rgba8_outptr);
|
||||
free(rgba16_outptr);
|
||||
free(rgbafp_outptr);
|
||||
|
||||
if (err)
|
||||
log_error("IMAGE random copy test failed\n");
|
||||
else
|
||||
log_info("IMAGE random copy test passed\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
418
test_conformance/basic/test_imagereadwrite.c
Normal file
418
test_conformance/basic/test_imagereadwrite.c
Normal file
@@ -0,0 +1,418 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static unsigned char *
|
||||
generate_rgba8_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
update_rgba8_image(unsigned char *p, int x, int y, int w, int h, int img_width, MTdata d)
|
||||
{
|
||||
int i, j, indx;
|
||||
|
||||
for (i=y; i<y+h; i++)
|
||||
{
|
||||
indx = (i * img_width + x) * 4;
|
||||
for (j=x; j<x+w; j++,indx+=4)
|
||||
{
|
||||
p[indx+0] = (unsigned char)genrand_int32(d);
|
||||
p[indx+1] = (unsigned char)genrand_int32(d);
|
||||
p[indx+2] = (unsigned char)genrand_int32(d);
|
||||
p[indx+3] = (unsigned char)genrand_int32(d);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
update_image_from_image(void *out, void *in, int x, int y, int w, int h, int img_width, int elem_size)
|
||||
{
|
||||
int i, j, k, out_indx, in_indx;
|
||||
in_indx = 0;
|
||||
|
||||
for (i=y; i<y+h; i++)
|
||||
{
|
||||
out_indx = (i * img_width + x) * elem_size;
|
||||
for (j=x; j<x+w; j++,out_indx+=elem_size)
|
||||
{
|
||||
for (k=0; k<elem_size; k++)
|
||||
{
|
||||
((char*)out)[out_indx + k] = ((char*)in)[in_indx];
|
||||
in_indx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static unsigned short *
|
||||
generate_rgba16_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned short *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned short)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
update_rgba16_image(unsigned short *p, int x, int y, int w, int h, int img_width, MTdata d)
|
||||
{
|
||||
int i, j, indx;
|
||||
|
||||
for (i=y; i<y+h; i++)
|
||||
{
|
||||
indx = (i * img_width + x) * 4;
|
||||
for (j=x; j<x+w; j++,indx+=4)
|
||||
{
|
||||
p[indx+0] = (unsigned short)genrand_int32(d);
|
||||
p[indx+1] = (unsigned short)genrand_int32(d);
|
||||
p[indx+2] = (unsigned short)genrand_int32(d);
|
||||
p[indx+3] = (unsigned short)genrand_int32(d);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static float *
|
||||
generate_rgbafp_image(int w, int h, MTdata d)
|
||||
{
|
||||
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
update_rgbafp_image(float *p, int x, int y, int w, int h, int img_width, MTdata d)
|
||||
{
|
||||
int i, j, indx;
|
||||
|
||||
for (i=y; i<y+h; i++)
|
||||
{
|
||||
indx = (i * img_width + x) * 4;
|
||||
for (j=x; j<x+w; j++,indx+=4)
|
||||
{
|
||||
p[indx+0] = get_random_float(-0x40000000, 0x40000000, d);
|
||||
p[indx+1] = get_random_float(-0x40000000, 0x40000000, d);
|
||||
p[indx+2] = get_random_float(-0x40000000, 0x40000000, d);
|
||||
p[indx+3] = get_random_float(-0x40000000, 0x40000000, d);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgbafp_image(float *image, float *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("i = %d. Expected (%f %f %f %f), got (%f %f %f %f)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_imagereadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_image_format img_format;
|
||||
unsigned char *rgba8_inptr, *rgba8_outptr;
|
||||
unsigned short *rgba16_inptr, *rgba16_outptr;
|
||||
float *rgbafp_inptr, *rgbafp_outptr;
|
||||
clMemWrapper streams[3];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int num_tries = 200;
|
||||
int i, j, err;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
|
||||
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
|
||||
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
|
||||
|
||||
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
|
||||
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
void *p;
|
||||
|
||||
if (i == 0)
|
||||
p = (void *)rgba8_inptr;
|
||||
else if (i == 1)
|
||||
p = (void *)rgba16_inptr;
|
||||
else
|
||||
p = (void *)rgbafp_inptr;
|
||||
size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, streams[i], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
p, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage2D failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (i=0,j=0; i<num_tries*3; i++,j++)
|
||||
{
|
||||
int x = (int)get_random_float(0, img_width, d);
|
||||
int y = (int)get_random_float(0, img_height, d);
|
||||
int w = (int)get_random_float(1, (img_width - x), d);
|
||||
int h = (int)get_random_float(1, (img_height - y), d);
|
||||
size_t input_pitch;
|
||||
int set_input_pitch = (int)(genrand_int32(d) & 0x01);
|
||||
int packed_update = (int)(genrand_int32(d) & 0x01);
|
||||
void *p, *outp;
|
||||
int elem_size;
|
||||
|
||||
if (j == 3)
|
||||
j = 0;
|
||||
|
||||
switch (j)
|
||||
{
|
||||
case 0:
|
||||
//if ((w<=10) || (h<=10)) continue;
|
||||
elem_size = 4;
|
||||
if(packed_update)
|
||||
{
|
||||
p = generate_rgba8_image(w, h, d);
|
||||
update_image_from_image(rgba8_inptr, p, x, y, w, h, img_width, elem_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
update_rgba8_image(rgba8_inptr, x, y, w, h, img_width, d);
|
||||
p = (void *)(rgba8_inptr + ((y * img_width + x) * 4));
|
||||
}
|
||||
outp = (void *)rgba8_outptr;
|
||||
break;
|
||||
case 1:
|
||||
//if ((w<=8) || (h<=8)) continue;
|
||||
elem_size = 2*4;
|
||||
if(packed_update)
|
||||
{
|
||||
p = generate_rgba16_image(w, h, d);
|
||||
update_image_from_image(rgba16_inptr, p, x, y, w, h, img_width, elem_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
update_rgba16_image(rgba16_inptr, x, y, w, h, img_width, d);
|
||||
p = (void *)(rgba16_inptr + ((y * img_width + x) * 4));
|
||||
}
|
||||
outp = (void *)rgba16_outptr;
|
||||
break;
|
||||
case 2:
|
||||
//if ((w<=8) || (h<=8)) continue;
|
||||
elem_size = 4*4;
|
||||
if(packed_update)
|
||||
{
|
||||
p = generate_rgbafp_image(w, h, d);
|
||||
update_image_from_image(rgbafp_inptr, p, x, y, w, h, img_width, elem_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
update_rgbafp_image(rgbafp_inptr, x, y, w, h, img_width, d);
|
||||
p = (void *)(rgbafp_inptr + ((y * img_width + x) * 4));
|
||||
}
|
||||
outp = (void *)rgbafp_outptr;
|
||||
break;
|
||||
}
|
||||
|
||||
const char* update_packed_pitch_name = "";
|
||||
if(packed_update)
|
||||
{
|
||||
if(set_input_pitch)
|
||||
{
|
||||
// for packed updates the pitch does not need to be calculated here (but can be)
|
||||
update_packed_pitch_name = "'packed with pitch'";
|
||||
input_pitch = w*elem_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
// for packed updates the pitch does not need to be calculated here
|
||||
update_packed_pitch_name = "'packed without pitch'";
|
||||
input_pitch = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// for unpacked updates the pitch is required
|
||||
update_packed_pitch_name = "'unpacked with pitch'";
|
||||
input_pitch = img_width*elem_size;
|
||||
}
|
||||
|
||||
size_t origin[3] = {x,y,0}, region[3] = {w, h, 1};
|
||||
err = clEnqueueWriteImage(queue, streams[j], CL_TRUE,
|
||||
origin, region, input_pitch, 0, p,
|
||||
0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage update failed for %s %s: %d\n",
|
||||
(packed_update) ? "packed" : "unpacked",
|
||||
(set_input_pitch) ? "set pitch" : "unset pitch", err);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(packed_update)
|
||||
{
|
||||
free(p);
|
||||
p = NULL;
|
||||
}
|
||||
|
||||
memset(outp, 0x7, img_width*img_height*elem_size);
|
||||
|
||||
origin[0]=0; origin[1]=0; origin[2]=0;
|
||||
region[0]=img_width; region[1]=img_height; region[2]=1;
|
||||
err = clEnqueueReadImage(queue, streams[j], CL_TRUE,
|
||||
origin, region, 0,0,
|
||||
outp, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadImage failed\n");
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (j)
|
||||
{
|
||||
case 0:
|
||||
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
|
||||
if (err)
|
||||
{
|
||||
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
|
||||
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
|
||||
if (err)
|
||||
{
|
||||
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
|
||||
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
|
||||
if (err)
|
||||
{
|
||||
log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
|
||||
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (err) break;
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
free(rgba8_inptr);
|
||||
free(rgba16_inptr);
|
||||
free(rgbafp_inptr);
|
||||
free(rgba8_outptr);
|
||||
free(rgba16_outptr);
|
||||
free(rgbafp_outptr);
|
||||
|
||||
if (!err)
|
||||
log_info("IMAGE read, write test passed\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
418
test_conformance/basic/test_imagereadwrite3d.c
Normal file
418
test_conformance/basic/test_imagereadwrite3d.c
Normal file
@@ -0,0 +1,418 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static unsigned char *
|
||||
generate_rgba8_image(int w, int h, int d, MTdata mtData)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * d *4);
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*d*4; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(mtData);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
update_rgba8_image(unsigned char *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
|
||||
{
|
||||
int i, j, k, indx;
|
||||
int img_slice = img_width * img_height;
|
||||
|
||||
for (k=z; k<z+d; k++)
|
||||
for (j=y; j<y+h; j++)
|
||||
{
|
||||
indx = (k * img_slice + j * img_width + x) * 4;
|
||||
for (i=x; i<x+w; i++,indx+=4)
|
||||
{
|
||||
p[indx+0] = (unsigned char)genrand_int32(mtData);
|
||||
p[indx+1] = (unsigned char)genrand_int32(mtData);
|
||||
p[indx+2] = (unsigned char)genrand_int32(mtData);
|
||||
p[indx+3] = (unsigned char)genrand_int32(mtData);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
update_image_from_image(void *out, void *in, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, int elem_size)
|
||||
{
|
||||
int i, j, k, elem, out_indx, in_indx;
|
||||
int img_slice = img_width * img_height;
|
||||
in_indx = 0;
|
||||
|
||||
for (k=z; k<z+d; k++)
|
||||
for (j=y; j<y+h; j++)
|
||||
{
|
||||
out_indx = (k * img_slice + j * img_width + x) * elem_size;
|
||||
for (i=x; i<x+w; i++,out_indx+=elem_size)
|
||||
{
|
||||
for (elem=0; elem<elem_size; elem++)
|
||||
{
|
||||
((char*)out)[out_indx + elem] = ((char*)in)[in_indx];
|
||||
in_indx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h, int d)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*d*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static unsigned short *
|
||||
generate_rgba16_image(int w, int h, int d, MTdata mtData)
|
||||
{
|
||||
unsigned short *ptr = (unsigned short*)malloc(w * h * d * 4 * sizeof(unsigned short));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*d*4; i++)
|
||||
ptr[i] = (unsigned short)genrand_int32(mtData);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
update_rgba16_image(unsigned short *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
|
||||
{
|
||||
int i, j, k, indx;
|
||||
int img_slice = img_width * img_height;
|
||||
|
||||
for (k=z; k<z+d; k++)
|
||||
for (j=y; j<y+h; j++)
|
||||
{
|
||||
indx = (k * img_slice + j * img_width + x) * 4;
|
||||
for (i=x; i<x+w; i++,indx+=4)
|
||||
{
|
||||
p[indx+0] = (unsigned short)genrand_int32(mtData);
|
||||
p[indx+1] = (unsigned short)genrand_int32(mtData);
|
||||
p[indx+2] = (unsigned short)genrand_int32(mtData);
|
||||
p[indx+3] = (unsigned short)genrand_int32(mtData);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h, int d)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*d*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static float *
|
||||
generate_rgbafp_image(int w, int h, int d, MTdata mtData)
|
||||
{
|
||||
float *ptr = (float*)malloc(w * h * d *4 * sizeof(float));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*d*4; i++)
|
||||
ptr[i] = get_random_float(-0x40000000, 0x40000000, mtData);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
update_rgbafp_image(float *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
|
||||
{
|
||||
int i, j, k, indx;
|
||||
int img_slice = img_width * img_height;
|
||||
|
||||
for (k=z; k<z+d; k++)
|
||||
for (j=y; j<y+h; j++)
|
||||
{
|
||||
indx = (k * img_slice + j * img_width + x) * 4;
|
||||
for (i=x; i<x+w; i++,indx+=4)
|
||||
{
|
||||
p[indx+0] = get_random_float(-0x40000000, 0x40000000, mtData);
|
||||
p[indx+1] = get_random_float(-0x40000000, 0x40000000, mtData);
|
||||
p[indx+2] = get_random_float(-0x40000000, 0x40000000, mtData);
|
||||
p[indx+3] = get_random_float(-0x40000000, 0x40000000, mtData);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgbafp_image(float *image, float *outptr, int w, int h, int d)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*d*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("i = %d. Expected (%f %f %f %f), got (%f %f %f %f)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_imagereadwrite3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_image_format img_format;
|
||||
unsigned char *rgba8_inptr, *rgba8_outptr;
|
||||
unsigned short *rgba16_inptr, *rgba16_outptr;
|
||||
float *rgbafp_inptr, *rgbafp_outptr;
|
||||
clMemWrapper streams[3];
|
||||
int img_width = 64;
|
||||
int img_height = 64;
|
||||
int img_depth = 32;
|
||||
int img_slice = img_width * img_height;
|
||||
int num_tries = 30;
|
||||
int i, j, err;
|
||||
MTdata mtData;
|
||||
|
||||
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
|
||||
|
||||
mtData = init_genrand( gRandomSeed );
|
||||
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, img_depth, mtData);
|
||||
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, img_depth, mtData);
|
||||
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, img_depth, mtData);
|
||||
|
||||
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height * img_depth);
|
||||
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height * img_depth);
|
||||
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height * img_depth);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
void *p;
|
||||
|
||||
if (i == 0)
|
||||
p = (void *)rgba8_inptr;
|
||||
else if (i == 1)
|
||||
p = (void *)rgba16_inptr;
|
||||
else
|
||||
p = (void *)rgbafp_inptr;
|
||||
|
||||
size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, img_depth};
|
||||
err = clEnqueueWriteImage(queue, streams[i], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
p,
|
||||
0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteImage failed");
|
||||
}
|
||||
|
||||
for (i=0,j=0; i<num_tries*3; i++,j++)
|
||||
{
|
||||
int x = (int)get_random_float(0, (float)img_width - 1, mtData);
|
||||
int y = (int)get_random_float(0, (float)img_height - 1, mtData);
|
||||
int z = (int)get_random_float(0, (float)img_depth - 1, mtData);
|
||||
int w = (int)get_random_float(1, (float)(img_width - x), mtData);
|
||||
int h = (int)get_random_float(1, (float)(img_height - y), mtData);
|
||||
int d = (int)get_random_float(1, (float)(img_depth - z), mtData);
|
||||
size_t input_pitch, input_slice_pitch;
|
||||
int set_input_pitch = (int)(genrand_int32(mtData) & 0x01);
|
||||
int packed_update = (int)(genrand_int32(mtData) & 0x01);
|
||||
void *p, *outp;
|
||||
int elem_size;
|
||||
|
||||
if (j == 3)
|
||||
j = 0;
|
||||
|
||||
// packed: the source image for the write is a whole image .
|
||||
// unpacked: the source image for the write is a subset within a larger image
|
||||
switch (j)
|
||||
{
|
||||
case 0:
|
||||
elem_size = 4;
|
||||
if(packed_update)
|
||||
{
|
||||
p = generate_rgba8_image(w, h, d, mtData);
|
||||
update_image_from_image(rgba8_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
update_rgba8_image(rgba8_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
|
||||
p = (void *)(rgba8_inptr + ((z * img_slice + y * img_width + x) * 4));
|
||||
}
|
||||
outp = (void *)rgba8_outptr;
|
||||
break;
|
||||
case 1:
|
||||
elem_size = 2*4;
|
||||
if(packed_update)
|
||||
{
|
||||
p = generate_rgba16_image(w, h, d, mtData);
|
||||
update_image_from_image(rgba16_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
update_rgba16_image(rgba16_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
|
||||
p = (void *)(rgba16_inptr + ((z * img_slice + y * img_width + x) * 4));
|
||||
}
|
||||
outp = (void *)rgba16_outptr;
|
||||
break;
|
||||
case 2:
|
||||
elem_size = 4*4;
|
||||
if(packed_update)
|
||||
{
|
||||
p = generate_rgbafp_image(w, h, d, mtData);
|
||||
update_image_from_image(rgbafp_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
update_rgbafp_image(rgbafp_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
|
||||
p = (void *)(rgbafp_inptr + ((z * img_slice + y * img_width + x) * 4));
|
||||
}
|
||||
outp = (void *)rgbafp_outptr;
|
||||
break;
|
||||
}
|
||||
|
||||
const char* update_packed_pitch_name = "";
|
||||
if(packed_update)
|
||||
{
|
||||
if(set_input_pitch)
|
||||
{
|
||||
// for packed updates the pitch does not need to be calculated here (but can be)
|
||||
update_packed_pitch_name = "'packed with pitch'";
|
||||
input_pitch = w*elem_size;
|
||||
input_slice_pitch = w*h*elem_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
// for packed updates the pitch does not need to be calculated here
|
||||
update_packed_pitch_name = "'packed without pitch'";
|
||||
input_pitch = 0;
|
||||
input_slice_pitch = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// for unpacked updates the pitch is required
|
||||
update_packed_pitch_name = "'unpacked with pitch'";
|
||||
input_pitch = img_width*elem_size;
|
||||
input_slice_pitch = input_pitch*img_height;
|
||||
}
|
||||
|
||||
size_t origin[3] = {x,y,z}, region[3] = {w, h, d};
|
||||
err = clEnqueueWriteImage(queue, streams[j], CL_TRUE,
|
||||
origin, region, input_pitch, input_slice_pitch,
|
||||
p, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteImage failed");
|
||||
|
||||
if(packed_update)
|
||||
{
|
||||
free(p);
|
||||
p = NULL;
|
||||
}
|
||||
|
||||
memset(outp, 0x7, img_width*img_height*img_depth*elem_size);
|
||||
|
||||
origin[0]=0; origin[1]=0; origin[2]=0; region[0]=img_width; region[1]=img_height; region[2]=img_depth;
|
||||
err = clEnqueueReadImage(queue, streams[j], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
outp, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueReadImage failed");
|
||||
|
||||
switch (j)
|
||||
{
|
||||
case 0:
|
||||
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height, img_depth);
|
||||
if (err)
|
||||
{
|
||||
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
|
||||
log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height, img_depth);
|
||||
if (err)
|
||||
{
|
||||
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
|
||||
log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height, img_depth);
|
||||
if (err)
|
||||
{
|
||||
log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
|
||||
log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
free_mtdata(mtData);
|
||||
free(rgba8_inptr);
|
||||
free(rgba16_inptr);
|
||||
free(rgbafp_inptr);
|
||||
free(rgba8_outptr);
|
||||
free(rgba16_outptr);
|
||||
free(rgbafp_outptr);
|
||||
|
||||
if (!err)
|
||||
log_info("IMAGE read, write test passed\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
160
test_conformance/basic/test_int2float.c
Normal file
160
test_conformance/basic/test_int2float.c
Normal file
@@ -0,0 +1,160 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *int2float_kernel_code =
|
||||
"__kernel void test_int2float(__global int *src, __global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)src[tid];\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
int
|
||||
verify_int2float(cl_int *inptr, cl_float *outptr, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
if (outptr[i] != (float)inptr[i])
|
||||
{
|
||||
log_error("INT2FLOAT test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("INT2FLOAT test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
test_int2float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_int *input_ptr;
|
||||
cl_float *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
void *values[2];
|
||||
size_t lengths[1];
|
||||
size_t threads[1];
|
||||
int err;
|
||||
int i;
|
||||
MTdata d;
|
||||
|
||||
input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
|
||||
output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_int)get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
lengths[0] = strlen(int2float_kernel_code);
|
||||
program = clCreateProgramWithSource(context, 1, &int2float_kernel_code, lengths, NULL);
|
||||
if (!program)
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgramExecutable failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel = clCreateKernel(program, "test_int2float", NULL);
|
||||
if (!kernel)
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
values[0] = streams[0];
|
||||
values[1] = streams[1];
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (size_t)num_elements;
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_int2float(input_ptr, output_ptr, num_elements);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
390
test_conformance/basic/test_intmath_int.c
Normal file
390
test_conformance/basic/test_intmath_int.c
Normal file
@@ -0,0 +1,390 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *int_add_kernel_code =
|
||||
"__kernel void test_int_add(__global int *srcA, __global int *srcB, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] + srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *int_sub_kernel_code =
|
||||
"__kernel void test_int_sub(__global int *srcA, __global int *srcB, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] - srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *int_mul_kernel_code =
|
||||
"__kernel void test_int_mul(__global int *srcA, __global int *srcB, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *int_mad_kernel_code =
|
||||
"__kernel void test_int_mad(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
|
||||
"}\n";
|
||||
|
||||
static const float MAX_ERR = 1e-5f;
|
||||
|
||||
int
|
||||
verify_int_add(int *inptrA, int *inptrB, int *outptr, int n)
|
||||
{
|
||||
int r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] + inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("INT_ADD int test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("INT_ADD int test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_int_sub(int *inptrA, int *inptrB, int *outptr, int n)
|
||||
{
|
||||
int r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] - inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("INT_SUB int test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("INT_SUB int test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_int_mul(int *inptrA, int *inptrB, int *outptr, int n)
|
||||
{
|
||||
int r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("INT_MUL int test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("INT_MUL int test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_int_mad(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
|
||||
{
|
||||
int r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i] + inptrC[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("INT_MAD int test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("INT_MAD int test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
test_intmath_int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_program program[4];
|
||||
cl_kernel kernel[4];
|
||||
|
||||
cl_int *input_ptr[3], *output_ptr, *p;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
size_t length = sizeof(cl_int) * num_elements;
|
||||
|
||||
input_ptr[0] = (cl_int*)malloc(length);
|
||||
input_ptr[1] = (cl_int*)malloc(length);
|
||||
input_ptr[2] = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[3])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = (int)genrand_int32(d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = (int)genrand_int32(d);
|
||||
p = input_ptr[2];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = (int)genrand_int32(d);
|
||||
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[0] = clCreateProgramWithSource(context, 1, &int_add_kernel_code, NULL, NULL);
|
||||
if (!program[0])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[0] = clCreateKernel(program[0], "test_int_add", NULL);
|
||||
if (!kernel[0])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[1] = clCreateProgramWithSource(context, 1, &int_sub_kernel_code, NULL, NULL);
|
||||
if (!program[1])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[1] = clCreateKernel(program[1], "test_int_sub", NULL);
|
||||
if (!kernel[1])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[2] = clCreateProgramWithSource(context, 1, &int_mul_kernel_code, NULL, NULL);
|
||||
if (!program[2])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[2] = clCreateKernel(program[2], "test_int_mul", NULL);
|
||||
if (!kernel[2])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[3] = clCreateProgramWithSource(context, 1, &int_mad_kernel_code, NULL, NULL);
|
||||
if (!program[3])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[3] = clCreateKernel(program[3], "test_int_mad", NULL);
|
||||
if (!kernel[3])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_int_add(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_int_sub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_int_mul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 3:
|
||||
err = verify_int_mad(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
389
test_conformance/basic/test_intmath_int2.c
Normal file
389
test_conformance/basic/test_intmath_int2.c
Normal file
@@ -0,0 +1,389 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *int_add2_kernel_code =
|
||||
"__kernel void test_int_add2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] + srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *int_sub2_kernel_code =
|
||||
"__kernel void test_int_sub2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] - srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *int_mul2_kernel_code =
|
||||
"__kernel void test_int_mul2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *int_mad2_kernel_code =
|
||||
"__kernel void test_int_mad2(__global int2 *srcA, __global int2 *srcB, __global int2 *srcC, __global int2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
|
||||
"}\n";
|
||||
|
||||
int
|
||||
verify_int_add2(int *inptrA, int *inptrB, int *outptr, int n)
|
||||
{
|
||||
int r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] + inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("INT_ADD int2 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("INT_ADD int2 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_int_sub2(int *inptrA, int *inptrB, int *outptr, int n)
|
||||
{
|
||||
int r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] - inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("INT_SUB int2 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("INT_SUB int2 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_int_mul2(int *inptrA, int *inptrB, int *outptr, int n)
|
||||
{
|
||||
int r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("INT_MUL int2 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("INT_MUL int2 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_int_mad2(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
|
||||
{
|
||||
int r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i] + inptrC[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("INT_MAD int2 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("INT_MAD int2 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
test_intmath_int2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_program program[4];
|
||||
cl_kernel kernel[4];
|
||||
|
||||
cl_int *input_ptr[3], *output_ptr, *p;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
size_t length = sizeof(cl_int) * 2 * num_elements;
|
||||
|
||||
input_ptr[0] = (cl_int*)malloc(length);
|
||||
input_ptr[1] = (cl_int*)malloc(length);
|
||||
input_ptr[2] = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[3])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements*2; i++)
|
||||
p[i] = (int)genrand_int32(d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements*2; i++)
|
||||
p[i] = (int)genrand_int32(d);
|
||||
p = input_ptr[2];
|
||||
for (i=0; i<num_elements*2; i++)
|
||||
p[i] = (int)genrand_int32(d);
|
||||
|
||||
free_mtdata( d );
|
||||
d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[0] = clCreateProgramWithSource(context, 1, &int_add2_kernel_code, NULL, NULL);
|
||||
if (!program[0])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[0] = clCreateKernel(program[0], "test_int_add2", NULL);
|
||||
if (!kernel[0])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[1] = clCreateProgramWithSource(context, 1, &int_sub2_kernel_code, NULL, NULL);
|
||||
if (!program[1])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[1] = clCreateKernel(program[1], "test_int_sub2", NULL);
|
||||
if (!kernel[1])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[2] = clCreateProgramWithSource(context, 1, &int_mul2_kernel_code, NULL, NULL);
|
||||
if (!program[2])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[2] = clCreateKernel(program[2], "test_int_mul2", NULL);
|
||||
if (!kernel[2])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[3] = clCreateProgramWithSource(context, 1, &int_mad2_kernel_code, NULL, NULL);
|
||||
if (!program[3])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[3] = clCreateKernel(program[3], "test_int_mad2", NULL);
|
||||
if (!kernel[3])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_int_add2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_int_sub2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_int_mul2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 3:
|
||||
err = verify_int_mad2(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
388
test_conformance/basic/test_intmath_int4.c
Normal file
388
test_conformance/basic/test_intmath_int4.c
Normal file
@@ -0,0 +1,388 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *int_add4_kernel_code =
|
||||
"__kernel void test_int_add4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] + srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *int_sub4_kernel_code =
|
||||
"__kernel void test_int_sub4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] - srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *int_mul4_kernel_code =
|
||||
"__kernel void test_int_mul4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *int_mad4_kernel_code =
|
||||
"__kernel void test_int_mad4(__global int4 *srcA, __global int4 *srcB, __global int4 *srcC, __global int4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
|
||||
"}\n";
|
||||
|
||||
int
|
||||
verify_int_add4(int *inptrA, int *inptrB, int *outptr, int n)
|
||||
{
|
||||
int r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] + inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("INT_ADD int4 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("INT_ADD int4 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_int_sub4(int *inptrA, int *inptrB, int *outptr, int n)
|
||||
{
|
||||
int r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] - inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("INT_SUB int4 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("INT_SUB int4 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_int_mul4(int *inptrA, int *inptrB, int *outptr, int n)
|
||||
{
|
||||
int r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("INT_MUL int4 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("INT_MUL int4 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_int_mad4(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
|
||||
{
|
||||
int r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i] + inptrC[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("INT_MAD int4 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("INT_MAD int4 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
test_intmath_int4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_program program[4];
|
||||
cl_kernel kernel[4];
|
||||
|
||||
cl_int *input_ptr[3], *output_ptr, *p;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
size_t length = sizeof(cl_int) * 4 * num_elements;
|
||||
|
||||
input_ptr[0] = (cl_int*)malloc(length);
|
||||
input_ptr[1] = (cl_int*)malloc(length);
|
||||
input_ptr[2] = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[3])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements*4; i++)
|
||||
p[i] = (int)genrand_int32(d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements*4; i++)
|
||||
p[i] = (int)genrand_int32(d);
|
||||
p = input_ptr[2];
|
||||
for (i=0; i<num_elements*4; i++)
|
||||
p[i] = (int)genrand_int32(d);
|
||||
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[0] = clCreateProgramWithSource(context, 1, &int_add4_kernel_code, NULL, NULL);
|
||||
if (!program[0])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[0] = clCreateKernel(program[0], "test_int_add4", NULL);
|
||||
if (!kernel[0])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[1] = clCreateProgramWithSource(context, 1, &int_sub4_kernel_code, NULL, NULL);
|
||||
if (!program[1])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[1] = clCreateKernel(program[1], "test_int_sub4", NULL);
|
||||
if (!kernel[1])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[2] = clCreateProgramWithSource(context, 1, &int_mul4_kernel_code, NULL, NULL);
|
||||
if (!program[2])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[2] = clCreateKernel(program[2], "test_int_mul4", NULL);
|
||||
if (!kernel[2])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[3] = clCreateProgramWithSource(context, 1, &int_mad4_kernel_code, NULL, NULL);
|
||||
if (!program[3])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[3] = clCreateKernel(program[3], "test_int_mad4", NULL);
|
||||
if (!kernel[3])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_int_add4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_int_sub4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_int_mul4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 3:
|
||||
err = verify_int_mad4(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
398
test_conformance/basic/test_intmath_long.c
Normal file
398
test_conformance/basic/test_intmath_long.c
Normal file
@@ -0,0 +1,398 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *long_add_kernel_code =
|
||||
"__kernel void test_long_add(__global long *srcA, __global long *srcB, __global long *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] + srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *long_sub_kernel_code =
|
||||
"__kernel void test_long_sub(__global long *srcA, __global long *srcB, __global long *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] - srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *long_mul_kernel_code =
|
||||
"__kernel void test_long_mul(__global long *srcA, __global long *srcB, __global long *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *long_mad_kernel_code =
|
||||
"__kernel void test_long_mad(__global long *srcA, __global long *srcB, __global long *srcC, __global long *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
|
||||
"}\n";
|
||||
|
||||
static const float MAX_ERR = 1e-5f;
|
||||
|
||||
int
|
||||
verify_long_add(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
|
||||
{
|
||||
cl_long r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] + inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("LONG_ADD int test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("LONG_ADD int test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_long_sub(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
|
||||
{
|
||||
cl_long r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] - inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("LONG_SUB int test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("LONG_SUB int test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_long_mul(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
|
||||
{
|
||||
cl_long r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("LONG_MUL int test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("LONG_MUL int test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_long_mad(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
|
||||
{
|
||||
cl_long r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i] + inptrC[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("LONG_MAD int test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("LONG_MAD int test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
test_intmath_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_program program[4];
|
||||
cl_kernel kernel[4];
|
||||
|
||||
cl_long *input_ptr[3], *output_ptr, *p;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
|
||||
if(! gHasLong )
|
||||
{
|
||||
log_info("64-bit integers are not supported by this device. Skipping test.\n");
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
size_t length = sizeof(cl_long) * num_elements;
|
||||
|
||||
input_ptr[0] = (cl_long*)malloc(length);
|
||||
input_ptr[1] = (cl_long*)malloc(length);
|
||||
input_ptr[2] = (cl_long*)malloc(length);
|
||||
output_ptr = (cl_long*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[3])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
|
||||
p = input_ptr[2];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
|
||||
|
||||
free_mtdata(d);
|
||||
d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[0] = clCreateProgramWithSource(context, 1, &long_add_kernel_code, NULL, NULL);
|
||||
if (!program[0])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[0] = clCreateKernel(program[0], "test_long_add", NULL);
|
||||
if (!kernel[0])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[1] = clCreateProgramWithSource(context, 1, &long_sub_kernel_code, NULL, NULL);
|
||||
if (!program[1])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[1] = clCreateKernel(program[1], "test_long_sub", NULL);
|
||||
if (!kernel[1])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[2] = clCreateProgramWithSource(context, 1, &long_mul_kernel_code, NULL, NULL);
|
||||
if (!program[2])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[2] = clCreateKernel(program[2], "test_long_mul", NULL);
|
||||
if (!kernel[2])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[3] = clCreateProgramWithSource(context, 1, &long_mad_kernel_code, NULL, NULL);
|
||||
if (!program[3])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[3] = clCreateKernel(program[3], "test_long_mad", NULL);
|
||||
if (!kernel[3])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_long_add(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_long_sub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_long_mul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 3:
|
||||
err = verify_long_mad(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
396
test_conformance/basic/test_intmath_long2.c
Normal file
396
test_conformance/basic/test_intmath_long2.c
Normal file
@@ -0,0 +1,396 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *long_add2_kernel_code =
|
||||
"__kernel void test_long_add2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] + srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *long_sub2_kernel_code =
|
||||
"__kernel void test_long_sub2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] - srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *long_mul2_kernel_code =
|
||||
"__kernel void test_long_mul2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *long_mad2_kernel_code =
|
||||
"__kernel void test_long_mad2(__global long2 *srcA, __global long2 *srcB, __global long2 *srcC, __global long2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
|
||||
"}\n";
|
||||
|
||||
int
|
||||
verify_long_add2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
|
||||
{
|
||||
cl_long r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] + inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("LONG_ADD long2 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("LONG_ADD long2 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_long_sub2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
|
||||
{
|
||||
cl_long r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] - inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("LONG_SUB long2 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("LONG_SUB long2 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_long_mul2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
|
||||
{
|
||||
cl_long r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("LONG_MUL long2 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("LONG_MUL long2 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_long_mad2(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
|
||||
{
|
||||
cl_long r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i] + inptrC[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("LONG_MAD long2 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("LONG_MAD long2 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
test_intmath_long2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_program program[4];
|
||||
cl_kernel kernel[4];
|
||||
|
||||
cl_long *input_ptr[3], *output_ptr, *p;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
|
||||
if(! gHasLong)
|
||||
{
|
||||
log_info("64-bit integers are not supported in this device. Skipping test.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
size_t length = sizeof(cl_long) * 2* num_elements;
|
||||
|
||||
input_ptr[0] = (cl_long*)malloc(length);
|
||||
input_ptr[1] = (cl_long*)malloc(length);
|
||||
input_ptr[2] = (cl_long*)malloc(length);
|
||||
output_ptr = (cl_long*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[3])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements * 2; i++)
|
||||
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements * 2; i++)
|
||||
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
|
||||
p = input_ptr[2];
|
||||
for (i=0; i<num_elements * 2; i++)
|
||||
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
|
||||
|
||||
free_mtdata(d);
|
||||
d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[0] = clCreateProgramWithSource(context, 1, &long_add2_kernel_code, NULL, NULL);
|
||||
if (!program[0])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[0] = clCreateKernel(program[0], "test_long_add2", NULL);
|
||||
if (!kernel[0])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[1] = clCreateProgramWithSource(context, 1, &long_sub2_kernel_code, NULL, NULL);
|
||||
if (!program[1])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[1] = clCreateKernel(program[1], "test_long_sub2", NULL);
|
||||
if (!kernel[1])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[2] = clCreateProgramWithSource(context, 1, &long_mul2_kernel_code, NULL, NULL);
|
||||
if (!program[2])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[2] = clCreateKernel(program[2], "test_long_mul2", NULL);
|
||||
if (!kernel[2])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[3] = clCreateProgramWithSource(context, 1, &long_mad2_kernel_code, NULL, NULL);
|
||||
if (!program[3])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[3] = clCreateKernel(program[3], "test_long_mad2", NULL);
|
||||
if (!kernel[3])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_long_add2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_long_sub2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_long_mul2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 3:
|
||||
err = verify_long_mad2(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
396
test_conformance/basic/test_intmath_long4.c
Normal file
396
test_conformance/basic/test_intmath_long4.c
Normal file
@@ -0,0 +1,396 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *long_add4_kernel_code =
|
||||
"__kernel void test_long_add4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] + srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *long_sub4_kernel_code =
|
||||
"__kernel void test_long_sub4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] - srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *long_mul4_kernel_code =
|
||||
"__kernel void test_long_mul4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
const char *long_mad4_kernel_code =
|
||||
"__kernel void test_long_mad4(__global long4 *srcA, __global long4 *srcB, __global long4 *srcC, __global long4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
|
||||
"}\n";
|
||||
|
||||
int
|
||||
verify_long_add4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
|
||||
{
|
||||
cl_long r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] + inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("LONG_ADD long4 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("LONG_ADD long4 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_long_sub4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
|
||||
{
|
||||
cl_long r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] - inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("LONG_SUB long4 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("LONG_SUB long4 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_long_mul4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
|
||||
{
|
||||
cl_long r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("LONG_MUL long4 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("LONG_MUL long4 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_long_mad4(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
|
||||
{
|
||||
cl_long r;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = inptrA[i] * inptrB[i] + inptrC[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("LONG_MAD long4 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("LONG_MAD long4 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
test_intmath_long4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_program program[4];
|
||||
cl_kernel kernel[4];
|
||||
|
||||
cl_long *input_ptr[3], *output_ptr, *p;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
|
||||
if(! gHasLong )
|
||||
{
|
||||
log_info("64-bit integers are not supported by this device. Skipping test.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
size_t length = sizeof(cl_long) * 4 * num_elements;
|
||||
|
||||
input_ptr[0] = (cl_long*)malloc(length);
|
||||
input_ptr[1] = (cl_long*)malloc(length);
|
||||
input_ptr[2] = (cl_long*)malloc(length);
|
||||
output_ptr = (cl_long*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[3])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements * 4; i++)
|
||||
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements * 4; i++)
|
||||
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
|
||||
p = input_ptr[2];
|
||||
for (i=0; i<num_elements * 4; i++)
|
||||
p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
|
||||
|
||||
free_mtdata(d);
|
||||
d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[0] = clCreateProgramWithSource(context, 1, &long_add4_kernel_code, NULL, NULL);
|
||||
if (!program[0])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[0] = clCreateKernel(program[0], "test_long_add4", NULL);
|
||||
if (!kernel[0])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[1] = clCreateProgramWithSource(context, 1, &long_sub4_kernel_code, NULL, NULL);
|
||||
if (!program[1])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[1] = clCreateKernel(program[1], "test_long_sub4", NULL);
|
||||
if (!kernel[1])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[2] = clCreateProgramWithSource(context, 1, &long_mul4_kernel_code, NULL, NULL);
|
||||
if (!program[2])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[2] = clCreateKernel(program[2], "test_long_mul4", NULL);
|
||||
if (!kernel[2])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
program[3] = clCreateProgramWithSource(context, 1, &long_mad4_kernel_code, NULL, NULL);
|
||||
if (!program[3])
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgram failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel[3] = clCreateKernel(program[3], "test_long_mad4", NULL);
|
||||
if (!kernel[3])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_long_add4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_long_sub4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_long_mul4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
|
||||
break;
|
||||
case 3:
|
||||
err = verify_long_mad4(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
|
||||
break;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
253
test_conformance/basic/test_kernel_call_kernel_function.cpp
Normal file
253
test_conformance/basic/test_kernel_call_kernel_function.cpp
Normal file
@@ -0,0 +1,253 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
|
||||
const char *kernel_call_kernel_code[] = {
|
||||
"void test_function_to_call(__global int *output, __global int *input, int where);\n"
|
||||
"\n"
|
||||
"__kernel void test_kernel_to_call(__global int *output, __global int *input, int where) \n"
|
||||
"{\n"
|
||||
" int b;\n"
|
||||
" if (where == 0) {\n"
|
||||
" output[get_global_id(0)] = 0;\n"
|
||||
" }\n"
|
||||
" for (b=0; b<where; b++)\n"
|
||||
" output[get_global_id(0)] += input[b]; \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void test_call_kernel(__global int *src, __global int *dst, int times) \n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" int a;\n"
|
||||
" dst[tid] = 1;\n"
|
||||
" for (a=0; a<times; a++)\n"
|
||||
" test_kernel_to_call(dst, src, tid);\n"
|
||||
"}\n"
|
||||
"void test_function_to_call(__global int *output, __global int *input, int where) \n"
|
||||
"{\n"
|
||||
" int b;\n"
|
||||
" if (where == 0) {\n"
|
||||
" output[get_global_id(0)] = 0;\n"
|
||||
" }\n"
|
||||
" for (b=0; b<where; b++)\n"
|
||||
" output[get_global_id(0)] += input[b]; \n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void test_call_function(__global int *src, __global int *dst, int times) \n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" int a;\n"
|
||||
" dst[tid] = 1;\n"
|
||||
" for (a=0; a<times; a++)\n"
|
||||
" test_function_to_call(dst, src, tid);\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
|
||||
int test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
num_elements = 256;
|
||||
|
||||
int error, errors = 0;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel1, kernel2, kernel_to_call;
|
||||
clMemWrapper streams[2];
|
||||
|
||||
size_t threads[] = {num_elements,1,1};
|
||||
cl_int *input, *output, *expected;
|
||||
cl_int times = 4;
|
||||
int pass = 0;
|
||||
|
||||
input = (cl_int*)malloc(sizeof(cl_int)*num_elements);
|
||||
output = (cl_int*)malloc(sizeof(cl_int)*num_elements);
|
||||
expected = (cl_int*)malloc(sizeof(cl_int)*num_elements);
|
||||
|
||||
for (int i=0; i<num_elements; i++) {
|
||||
input[i] = i;
|
||||
output[i] = i;
|
||||
expected[i] = output[i];
|
||||
}
|
||||
// Calculate the expected results
|
||||
for (int tid=0; tid<num_elements; tid++) {
|
||||
expected[tid] = 1;
|
||||
for (int a=0; a<times; a++) {
|
||||
int where = tid;
|
||||
if (where == 0)
|
||||
expected[tid] = 0;
|
||||
for (int b=0; b<where; b++) {
|
||||
expected[tid] += input[b];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test kernel calling a kernel
|
||||
log_info("Testing kernel calling kernel...\n");
|
||||
// Create the kernel
|
||||
if( create_single_kernel_helper( context, &program, &kernel1, 1, kernel_call_kernel_code, "test_call_kernel" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel_to_call = clCreateKernel(program, "test_kernel_to_call", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
/* Create some I/O streams */
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*num_elements, input, &error);
|
||||
test_error( error, "clCreateBuffer failed" );
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*num_elements, output, &error);
|
||||
test_error( error, "clCreateBuffer failed" );
|
||||
|
||||
error = clSetKernelArg(kernel1, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "clSetKernelArg failed" );
|
||||
error = clSetKernelArg(kernel1, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "clSetKernelArg failed" );
|
||||
error = clSetKernelArg(kernel1, 2, sizeof( times ), ×);
|
||||
test_error( error, "clSetKernelArg failed" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel1, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "clEnqueueNDRangeKernel failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
|
||||
test_error( error, "clEnqueueReadBuffer failed" );
|
||||
|
||||
// Compare the results
|
||||
pass = 1;
|
||||
for (int i=0; i<num_elements; i++) {
|
||||
if (output[i] != expected[i]) {
|
||||
if (errors > 10)
|
||||
continue;
|
||||
if (errors == 10) {
|
||||
log_error("Suppressing further results...\n");
|
||||
continue;
|
||||
}
|
||||
log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
|
||||
errors++;
|
||||
pass = 0;
|
||||
}
|
||||
}
|
||||
if (pass) log_info("Passed kernel calling kernel...\n");
|
||||
|
||||
|
||||
|
||||
// Test kernel calling a function
|
||||
log_info("Testing kernel calling function...\n");
|
||||
// Reset the inputs
|
||||
for (int i=0; i<num_elements; i++) {
|
||||
input[i] = i;
|
||||
output[i] = i;
|
||||
}
|
||||
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, input, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueWriteBuffer failed");
|
||||
error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
kernel2 = clCreateKernel(program, "test_call_function", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
error = clSetKernelArg(kernel2, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "clSetKernelArg failed" );
|
||||
error = clSetKernelArg(kernel2, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "clSetKernelArg failed" );
|
||||
error = clSetKernelArg(kernel2, 2, sizeof( times ), ×);
|
||||
test_error( error, "clSetKernelArg failed" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel2, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "clEnqueueNDRangeKernel failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
|
||||
test_error( error, "clEnqueueReadBuffer failed" );
|
||||
|
||||
// Compare the results
|
||||
pass = 1;
|
||||
for (int i=0; i<num_elements; i++) {
|
||||
if (output[i] != expected[i]) {
|
||||
if (errors > 10)
|
||||
continue;
|
||||
if (errors > 10) {
|
||||
log_error("Suppressing further results...\n");
|
||||
continue;
|
||||
}
|
||||
log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
|
||||
errors++;
|
||||
pass = 0;
|
||||
}
|
||||
}
|
||||
if (pass) log_info("Passed kernel calling function...\n");
|
||||
|
||||
|
||||
// Test calling the kernel we called from another kernel
|
||||
log_info("Testing calling the kernel we called from another kernel before...\n");
|
||||
// Reset the inputs
|
||||
for (int i=0; i<num_elements; i++) {
|
||||
input[i] = i;
|
||||
output[i] = i;
|
||||
expected[i] = output[i];
|
||||
}
|
||||
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, input, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueWriteBuffer failed");
|
||||
error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
// Calculate the expected results
|
||||
int where = times;
|
||||
for (int tid=0; tid<num_elements; tid++) {
|
||||
if (where == 0)
|
||||
expected[tid] = 0;
|
||||
for (int b=0; b<where; b++) {
|
||||
expected[tid] += input[b];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
error = clSetKernelArg(kernel_to_call, 0, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "clSetKernelArg failed" );
|
||||
error = clSetKernelArg(kernel_to_call, 1, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "clSetKernelArg failed" );
|
||||
error = clSetKernelArg(kernel_to_call, 2, sizeof( times ), ×);
|
||||
test_error( error, "clSetKernelArg failed" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel_to_call, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "clEnqueueNDRangeKernel failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
|
||||
test_error( error, "clEnqueueReadBuffer failed" );
|
||||
|
||||
// Compare the results
|
||||
pass = 1;
|
||||
for (int i=0; i<num_elements; i++) {
|
||||
if (output[i] != expected[i]) {
|
||||
if (errors > 10)
|
||||
continue;
|
||||
if (errors > 10) {
|
||||
log_error("Suppressing further results...\n");
|
||||
continue;
|
||||
}
|
||||
log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
|
||||
errors++;
|
||||
pass = 0;
|
||||
}
|
||||
}
|
||||
if (pass) log_info("Passed calling the kernel we called from another kernel before...\n");
|
||||
|
||||
free( input );
|
||||
free( output );
|
||||
free( expected );
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
|
||||
570
test_conformance/basic/test_kernel_memory_alignment.cpp
Normal file
570
test_conformance/basic/test_kernel_memory_alignment.cpp
Normal file
@@ -0,0 +1,570 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
// For global, local, and constant
|
||||
const char *parameter_kernel_long =
|
||||
"%s\n" // optional pragma
|
||||
"kernel void test(global ulong *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
|
||||
"{\n"
|
||||
" results[0] = (ulong)&mem0[0];\n"
|
||||
" results[1] = (ulong)&mem2[0];\n"
|
||||
" results[2] = (ulong)&mem3[0];\n"
|
||||
" results[3] = (ulong)&mem4[0];\n"
|
||||
" results[4] = (ulong)&mem8[0];\n"
|
||||
" results[5] = (ulong)&mem16[0];\n"
|
||||
"}\n";
|
||||
|
||||
// For private and local
|
||||
const char *local_kernel_long =
|
||||
"%s\n" // optional pragma
|
||||
"kernel void test(global ulong *results)\n"
|
||||
"{\n"
|
||||
" %s %s mem0[3];\n"
|
||||
" %s %s2 mem2[3];\n"
|
||||
" %s %s3 mem3[3];\n"
|
||||
" %s %s4 mem4[3];\n"
|
||||
" %s %s8 mem8[3];\n"
|
||||
" %s %s16 mem16[3];\n"
|
||||
" results[0] = (ulong)&mem0[0];\n"
|
||||
" results[1] = (ulong)&mem2[0];\n"
|
||||
" results[2] = (ulong)&mem3[0];\n"
|
||||
" results[3] = (ulong)&mem4[0];\n"
|
||||
" results[4] = (ulong)&mem8[0];\n"
|
||||
" results[5] = (ulong)&mem16[0];\n"
|
||||
"}\n";
|
||||
|
||||
// For constant
|
||||
const char *constant_kernel_long =
|
||||
"%s\n" // optional pragma
|
||||
" constant %s mem0[3] = {0};\n"
|
||||
" constant %s2 mem2[3] = {(%s2)(0)};\n"
|
||||
" constant %s3 mem3[3] = {(%s3)(0)};\n"
|
||||
" constant %s4 mem4[3] = {(%s4)(0)};\n"
|
||||
" constant %s8 mem8[3] = {(%s8)(0)};\n"
|
||||
" constant %s16 mem16[3] = {(%s16)(0)};\n"
|
||||
"\n"
|
||||
"kernel void test(global ulong *results)\n"
|
||||
"{\n"
|
||||
" results[0] = (ulong)&mem0;\n"
|
||||
" results[1] = (ulong)&mem2;\n"
|
||||
" results[2] = (ulong)&mem3;\n"
|
||||
" results[3] = (ulong)&mem4;\n"
|
||||
" results[4] = (ulong)&mem8;\n"
|
||||
" results[5] = (ulong)&mem16;\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
// For global, local, and constant
|
||||
const char *parameter_kernel_no_long =
|
||||
"%s\n" // optional pragma
|
||||
"kernel void test(global uint *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
|
||||
"{\n"
|
||||
" results[0] = (uint)&mem0[0];\n"
|
||||
" results[1] = (uint)&mem2[0];\n"
|
||||
" results[2] = (uint)&mem3[0];\n"
|
||||
" results[3] = (uint)&mem4[0];\n"
|
||||
" results[4] = (uint)&mem8[0];\n"
|
||||
" results[5] = (uint)&mem16[0];\n"
|
||||
"}\n";
|
||||
|
||||
// For private and local
|
||||
const char *local_kernel_no_long =
|
||||
"%s\n" // optional pragma
|
||||
"kernel void test(global uint *results)\n"
|
||||
"{\n"
|
||||
" %s %s mem0[3];\n"
|
||||
" %s %s2 mem2[3];\n"
|
||||
" %s %s3 mem3[3];\n"
|
||||
" %s %s4 mem4[3];\n"
|
||||
" %s %s8 mem8[3];\n"
|
||||
" %s %s16 mem16[3];\n"
|
||||
" results[0] = (uint)&mem0[0];\n"
|
||||
" results[1] = (uint)&mem2[0];\n"
|
||||
" results[2] = (uint)&mem3[0];\n"
|
||||
" results[3] = (uint)&mem4[0];\n"
|
||||
" results[4] = (uint)&mem8[0];\n"
|
||||
" results[5] = (uint)&mem16[0];\n"
|
||||
"}\n";
|
||||
|
||||
// For constant
|
||||
const char *constant_kernel_no_long =
|
||||
"%s\n" // optional pragma
|
||||
" constant %s mem0[3] = {0};\n"
|
||||
" constant %s2 mem2[3] = {(%s2)(0)};\n"
|
||||
" constant %s3 mem3[3] = {(%s3)(0)};\n"
|
||||
" constant %s4 mem4[3] = {(%s4)(0)};\n"
|
||||
" constant %s8 mem8[3] = {(%s8)(0)};\n"
|
||||
" constant %s16 mem16[3] = {(%s16)(0)};\n"
|
||||
"\n"
|
||||
"kernel void test(global uint *results)\n"
|
||||
"{\n"
|
||||
" results[0] = (uint)&mem0;\n"
|
||||
" results[1] = (uint)&mem2;\n"
|
||||
" results[2] = (uint)&mem3;\n"
|
||||
" results[3] = (uint)&mem4;\n"
|
||||
" results[4] = (uint)&mem8;\n"
|
||||
" results[5] = (uint)&mem16;\n"
|
||||
"}\n";
|
||||
|
||||
enum AddressSpaces
|
||||
{
|
||||
kGlobal = 0,
|
||||
kLocal,
|
||||
kConstant,
|
||||
kPrivate
|
||||
};
|
||||
|
||||
typedef enum AddressSpaces AddressSpaces;
|
||||
|
||||
#define DEBUG 0
|
||||
|
||||
const char * get_explicit_address_name( AddressSpaces address )
|
||||
{
|
||||
/* Quick method to avoid branching: make sure the following array matches the Enum order */
|
||||
static const char *sExplicitAddressNames[] = { "global", "local", "constant", "private"};
|
||||
|
||||
return sExplicitAddressNames[ address ];
|
||||
}
|
||||
|
||||
|
||||
int test_kernel_memory_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, AddressSpaces address )
|
||||
{
|
||||
const char *constant_kernel;
|
||||
const char *parameter_kernel;
|
||||
const char *local_kernel;
|
||||
|
||||
if ( gHasLong )
|
||||
{
|
||||
constant_kernel = constant_kernel_long;
|
||||
parameter_kernel = parameter_kernel_long;
|
||||
local_kernel = local_kernel_long;
|
||||
}
|
||||
else
|
||||
{
|
||||
constant_kernel = constant_kernel_no_long;
|
||||
parameter_kernel = parameter_kernel_no_long;
|
||||
local_kernel = local_kernel_no_long;
|
||||
}
|
||||
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
|
||||
char *kernel_code = (char*)malloc(4096);
|
||||
cl_kernel kernel;
|
||||
cl_program program;
|
||||
int error;
|
||||
int total_errors = 0;
|
||||
cl_mem results;
|
||||
cl_ulong *results_data;
|
||||
cl_mem mem0, mem2, mem3, mem4, mem8, mem16;
|
||||
|
||||
results_data = (cl_ulong*)malloc(sizeof(cl_ulong)*6);
|
||||
results = clCreateBuffer(context, 0, sizeof(cl_ulong)*6, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
mem0 = clCreateBuffer(context, 0, sizeof(cl_long), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
mem2 = clCreateBuffer(context, 0, sizeof(cl_long)*2, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
mem3 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
mem4 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
mem8 = clCreateBuffer(context, 0, sizeof(cl_long)*8, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
mem16 = clCreateBuffer(context, 0, sizeof(cl_long)*16, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
|
||||
// For each type
|
||||
|
||||
// Calculate alignment mask for each size
|
||||
|
||||
// For global, local, constant, private
|
||||
|
||||
// If global, local or constant -- do parameter_kernel
|
||||
// If private or local -- do local_kernel
|
||||
// If constant -- do constant kernel
|
||||
|
||||
int numConstantArgs;
|
||||
clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof(numConstantArgs), &numConstantArgs, NULL);
|
||||
|
||||
int typeIndex;
|
||||
for (typeIndex = 0; typeIndex < 10; typeIndex++) {
|
||||
// Skip double tests if we don't support doubles
|
||||
if (vecType[typeIndex] == kDouble && !is_extension_available(device, "cl_khr_fp64")) {
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
|
||||
continue;
|
||||
|
||||
log_info("Testing %s...\n", get_explicit_type_name(vecType[typeIndex]));
|
||||
|
||||
// Determine the expected alignment masks.
|
||||
// E.g., if it is supposed to be 4 byte aligned, we should get 4-1=3 = ... 000011
|
||||
// We can then and the returned address with that and we should have 0.
|
||||
cl_ulong alignments[6];
|
||||
alignments[0] = get_explicit_type_size(vecType[typeIndex])-1;
|
||||
alignments[1] = (get_explicit_type_size(vecType[typeIndex])<<1)-1;
|
||||
alignments[2] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
|
||||
alignments[3] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
|
||||
alignments[4] = (get_explicit_type_size(vecType[typeIndex])<<3)-1;
|
||||
alignments[5] = (get_explicit_type_size(vecType[typeIndex])<<4)-1;
|
||||
|
||||
// Parameter kernel
|
||||
if (address == kGlobal || address == kLocal || address == kConstant) {
|
||||
log_info("\tTesting parameter kernel...\n");
|
||||
|
||||
if ( (gIsEmbedded) && (address == kConstant) && (numConstantArgs < 6)) {
|
||||
sprintf(kernel_code, parameter_kernel,
|
||||
vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
|
||||
);
|
||||
}
|
||||
else {
|
||||
sprintf(kernel_code, parameter_kernel,
|
||||
vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
|
||||
);
|
||||
}
|
||||
//printf("Kernel is: \n%s\n", kernel_code);
|
||||
|
||||
// Create the kernel
|
||||
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
|
||||
test_error(error, "create_single_kernel_helper failed");
|
||||
|
||||
// Initialize the results
|
||||
memset(results_data, 0, sizeof(cl_long)*5);
|
||||
error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*6, results_data, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
// Set the arguments
|
||||
error = clSetKernelArg(kernel, 0, sizeof(results), &results);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
if (address != kLocal) {
|
||||
error = clSetKernelArg(kernel, 1, sizeof(mem0), &mem0);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 2, sizeof(mem2), &mem2);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 3, sizeof(mem3), &mem3);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 4, sizeof(mem4), &mem4);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 5, sizeof(mem8), &mem8);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 6, sizeof(mem16), &mem16);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
} else {
|
||||
error = clSetKernelArg(kernel, 1, get_explicit_type_size(vecType[typeIndex]), NULL);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 2, get_explicit_type_size(vecType[typeIndex])*2, NULL);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 3, get_explicit_type_size(vecType[typeIndex])*4, NULL);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 4, get_explicit_type_size(vecType[typeIndex])*4, NULL);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 5, get_explicit_type_size(vecType[typeIndex])*8, NULL);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 6, get_explicit_type_size(vecType[typeIndex])*16, NULL);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
}
|
||||
|
||||
// Enqueue the kernel
|
||||
size_t global_size = 1;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
// Read back the results
|
||||
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*6, results_data, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
// Verify the results
|
||||
if (gHasLong) {
|
||||
for (int i = 0; i < 6; i++) {
|
||||
if ((results_data[i] & alignments[i]) != 0) {
|
||||
total_errors++;
|
||||
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
|
||||
} else {
|
||||
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Verify the results on devices that do not support longs
|
||||
else {
|
||||
cl_uint *results_data_no_long = (cl_uint *)results_data;
|
||||
|
||||
for (int i = 0; i < 6; i++) {
|
||||
if ((results_data_no_long[i] & alignments[i]) != 0) {
|
||||
total_errors++;
|
||||
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
|
||||
} else {
|
||||
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Local kernel
|
||||
if (address == kLocal || address == kPrivate) {
|
||||
log_info("\tTesting local kernel...\n");
|
||||
sprintf(kernel_code, local_kernel,
|
||||
vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
|
||||
);
|
||||
//printf("Kernel is: \n%s\n", kernel_code);
|
||||
|
||||
// Create the kernel
|
||||
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
|
||||
test_error(error, "create_single_kernel_helper failed");
|
||||
|
||||
// Initialize the results
|
||||
memset(results_data, 0, sizeof(cl_long)*5);
|
||||
error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
// Set the arguments
|
||||
error = clSetKernelArg(kernel, 0, sizeof(results), &results);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Enqueue the kernel
|
||||
size_t global_size = 1;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
// Read back the results
|
||||
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
// Verify the results
|
||||
if (gHasLong) {
|
||||
for (int i = 0; i < 5; i++) {
|
||||
if ((results_data[i] & alignments[i]) != 0) {
|
||||
total_errors++;
|
||||
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
|
||||
} else {
|
||||
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Verify the results on devices that do not support longs
|
||||
else {
|
||||
cl_uint *results_data_no_long = (cl_uint *)results_data;
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
if ((results_data_no_long[i] & alignments[i]) != 0) {
|
||||
total_errors++;
|
||||
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
|
||||
} else {
|
||||
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Constant kernel
|
||||
if (address == kConstant) {
|
||||
log_info("\tTesting constant kernel...\n");
|
||||
sprintf(kernel_code, constant_kernel,
|
||||
vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_type_name(vecType[typeIndex]),
|
||||
get_explicit_type_name(vecType[typeIndex])
|
||||
);
|
||||
//printf("Kernel is: \n%s\n", kernel_code);
|
||||
|
||||
// Create the kernel
|
||||
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
|
||||
test_error(error, "create_single_kernel_helper failed");
|
||||
|
||||
// Initialize the results
|
||||
memset(results_data, 0, sizeof(cl_long)*5);
|
||||
error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
// Set the arguments
|
||||
error = clSetKernelArg(kernel, 0, sizeof(results), &results);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Enqueue the kernel
|
||||
size_t global_size = 1;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
// Read back the results
|
||||
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueReadBuffer failed");
|
||||
|
||||
// Verify the results
|
||||
if (gHasLong) {
|
||||
for (int i = 0; i < 5; i++) {
|
||||
if ((results_data[i] & alignments[i]) != 0) {
|
||||
total_errors++;
|
||||
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
|
||||
} else {
|
||||
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Verify the results on devices that do not support longs
|
||||
else {
|
||||
cl_uint *results_data_no_long = (cl_uint *)results_data;
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
if ((results_data_no_long[i] & alignments[i]) != 0) {
|
||||
total_errors++;
|
||||
log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
|
||||
} else {
|
||||
if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
}
|
||||
}
|
||||
|
||||
clReleaseMemObject(results);
|
||||
clReleaseMemObject(mem0);
|
||||
clReleaseMemObject(mem2);
|
||||
clReleaseMemObject(mem3);
|
||||
clReleaseMemObject(mem4);
|
||||
clReleaseMemObject(mem8);
|
||||
clReleaseMemObject(mem16);
|
||||
free( kernel_code );
|
||||
free( results_data );
|
||||
|
||||
if (total_errors != 0)
|
||||
return -1;
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
return test_kernel_memory_alignment( device, context, queue, n_elems, kLocal );
|
||||
}
|
||||
|
||||
int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
return test_kernel_memory_alignment( device, context, queue, n_elems, kGlobal );
|
||||
}
|
||||
|
||||
int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
// There is a class of approved OpenCL 1.0 conformant devices out there that in some circumstances
|
||||
// are unable to meaningfully take (or more precisely use) the address of constant data by virtue
|
||||
// of limitations in their ISA design. This feature was not tested in 1.0, so they were declared
|
||||
// conformant by Khronos. The failure is however caught here.
|
||||
//
|
||||
// Unfortunately, determining whether or not these devices are 1.0 conformant is not the jurisdiction
|
||||
// of the 1.1 tests -- We can't fail them from 1.1 conformance here because they are not 1.1
|
||||
// devices. They are merely 1.0 conformant devices that interop with 1.1 devices in a 1.1 platform.
|
||||
// To add new binding tests now to conformant 1.0 devices would violate the workingroup requirement
|
||||
// of no new tests for 1.0 devices. So certain allowances have to be made in intractable cases
|
||||
// such as this one.
|
||||
//
|
||||
// There is some precedent. Similar allowances are made for other 1.0 hardware features such as
|
||||
// local memory size. The minimum required local memory size grew from 16 kB to 32 kB in OpenCL 1.1.
|
||||
|
||||
// Detect 1.0 devices
|
||||
// Get CL_DEVICE_VERSION size
|
||||
size_t string_size = 0;
|
||||
int err;
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, 0, NULL, &string_size ) ) )
|
||||
{
|
||||
log_error( "FAILURE: Unable to get size of CL_DEVICE_VERSION string!" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
//Allocate storage to hold the version string
|
||||
char *version_string = (char*) malloc(string_size);
|
||||
if( NULL == version_string )
|
||||
{
|
||||
log_error( "FAILURE: Unable to allocate memory to hold CL_DEVICE_VERSION string!" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get CL_DEVICE_VERSION string
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, string_size, version_string, NULL ) ) )
|
||||
{
|
||||
log_error( "FAILURE: Unable to read CL_DEVICE_VERSION string!" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// easy out for 1.0 devices
|
||||
const char *string_1_0 = "OpenCL 1.0 ";
|
||||
if( 0 == strncmp( version_string, string_1_0, strlen(string_1_0)) )
|
||||
{
|
||||
log_info( "WARNING: Allowing device to escape testing of difficult constant memory alignment case.\n\tDevice is not a OpenCL 1.1 device. CL_DEVICE_VERSION: \"%s\"\n", version_string );
|
||||
free(version_string);
|
||||
return 0;
|
||||
}
|
||||
log_info( "Device version string: \"%s\"\n", version_string );
|
||||
free(version_string);
|
||||
|
||||
// Everyone else is to be ground mercilessly under the wheels of progress
|
||||
return test_kernel_memory_alignment( device, context, queue, n_elems, kConstant );
|
||||
}
|
||||
|
||||
int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
return test_kernel_memory_alignment( device, context, queue, n_elems, kPrivate );
|
||||
}
|
||||
|
||||
|
||||
368
test_conformance/basic/test_local.c
Normal file
368
test_conformance/basic/test_local.c
Normal file
@@ -0,0 +1,368 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *barrier_with_localmem_kernel_code[] = {
|
||||
"__kernel void compute_sum_with_localmem(__global int *a, int n, __local int *tmp_sum, __global int *sum)\n"
|
||||
"{\n"
|
||||
" int tid = get_local_id(0);\n"
|
||||
" int lsize = get_local_size(0);\n"
|
||||
" int i;\n"
|
||||
"\n"
|
||||
" tmp_sum[tid] = 0;\n"
|
||||
" for (i=tid; i<n; i+=lsize)\n"
|
||||
" tmp_sum[tid] += a[i];\n"
|
||||
"\n"
|
||||
" if( lsize == 1 )\n"
|
||||
" {\n"
|
||||
" if( tid == 0 )\n"
|
||||
" *sum = tmp_sum[0];\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" do\n"
|
||||
" {\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" if (tid < lsize/2)\n"
|
||||
" {\n"
|
||||
" int sum = tmp_sum[tid];\n"
|
||||
" if( (lsize & 1) && tid == 0 )\n"
|
||||
" sum += tmp_sum[tid + lsize - 1];\n"
|
||||
" tmp_sum[tid] = sum + tmp_sum[tid + lsize/2];\n"
|
||||
" }\n"
|
||||
" lsize = lsize/2; \n"
|
||||
" }while( lsize );\n"
|
||||
"\n"
|
||||
" if( tid == 0 )\n"
|
||||
" *sum = tmp_sum[0];\n"
|
||||
"}\n",
|
||||
"__kernel void compute_sum_with_localmem(__global int *a, int n, __global int *sum)\n"
|
||||
"{\n"
|
||||
" __local int tmp_sum[%d];\n"
|
||||
" int tid = get_local_id(0);\n"
|
||||
" int lsize = get_local_size(0);\n"
|
||||
" int i;\n"
|
||||
"\n"
|
||||
" tmp_sum[tid] = 0;\n"
|
||||
" for (i=tid; i<n; i+=lsize)\n"
|
||||
" tmp_sum[tid] += a[i];\n"
|
||||
"\n"
|
||||
" if( lsize == 1 )\n"
|
||||
" {\n"
|
||||
" if( tid == 0 )\n"
|
||||
" *sum = tmp_sum[0];\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" do\n"
|
||||
" {\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" if (tid < lsize/2)\n"
|
||||
" {\n"
|
||||
" int sum = tmp_sum[tid];\n"
|
||||
" if( (lsize & 1) && tid == 0 )\n"
|
||||
" sum += tmp_sum[tid + lsize - 1];\n"
|
||||
" tmp_sum[tid] = sum + tmp_sum[tid + lsize/2];\n"
|
||||
" }\n"
|
||||
" lsize = lsize/2; \n"
|
||||
" }while( lsize );\n"
|
||||
"\n"
|
||||
" if( tid == 0 )\n"
|
||||
" *sum = tmp_sum[0];\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
static int
|
||||
verify_sum(int *inptr, int *outptr, int n)
|
||||
{
|
||||
int r = 0;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r += inptr[i];
|
||||
}
|
||||
|
||||
if (r != outptr[0])
|
||||
{
|
||||
log_error("LOCAL test failed: *%d vs %d\n", r, outptr[0] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info("LOCAL test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_local_arg_def(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
cl_int *input_ptr, *output_ptr;
|
||||
size_t global_threads[1], local_threads[1];
|
||||
size_t wgsize, kwgsize;
|
||||
size_t max_local_workgroup_size[3];
|
||||
int err, i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof wgsize, &wgsize, NULL);
|
||||
if (err) {
|
||||
log_error("clGetDeviceInfo failed, %d\n\n", err);
|
||||
return -1;
|
||||
}
|
||||
wgsize/=2;
|
||||
if (wgsize < 1)
|
||||
wgsize = 1;
|
||||
|
||||
size_t in_length = sizeof(cl_int) * num_elements;
|
||||
size_t out_length = sizeof(cl_int) * wgsize;
|
||||
|
||||
input_ptr = (cl_int *)malloc(in_length);
|
||||
output_ptr = (cl_int *)malloc(out_length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, in_length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, out_length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (int)genrand_int32(d);
|
||||
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, in_length, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_with_localmem_kernel_code[0], "compute_sum_with_localmem" );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof kwgsize, &kwgsize, NULL);
|
||||
test_error(err, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
// Pick the minimum of the device and the kernel
|
||||
if (kwgsize > max_local_workgroup_size[0])
|
||||
kwgsize = max_local_workgroup_size[0];
|
||||
|
||||
// err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes);
|
||||
err = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
|
||||
err |= clSetKernelArg(kernel, 2, wgsize * sizeof(cl_int), NULL);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
global_threads[0] = wgsize;
|
||||
local_threads[0] = wgsize;
|
||||
|
||||
// Adjust the local thread size to fit and be a nice multiple.
|
||||
if (kwgsize < wgsize) {
|
||||
log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize);
|
||||
local_threads[0] = kwgsize;
|
||||
}
|
||||
while (global_threads[0] % local_threads[0] != 0)
|
||||
local_threads[0]--;
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_sum(input_ptr, output_ptr, num_elements);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int test_local_kernel_def(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
cl_int *input_ptr, *output_ptr;
|
||||
size_t global_threads[1], local_threads[1];
|
||||
size_t wgsize, kwgsize;
|
||||
int err, i;
|
||||
char *program_source = (char*)malloc(sizeof(char)*2048);
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
size_t max_local_workgroup_size[3];
|
||||
memset(program_source, 0, 2048);
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof wgsize, &wgsize, NULL);
|
||||
if (err) {
|
||||
log_error("clGetDeviceInfo failed, %d\n\n", err);
|
||||
return -1;
|
||||
}
|
||||
wgsize/=2;
|
||||
if (wgsize < 1)
|
||||
wgsize = 1;
|
||||
|
||||
size_t in_length = sizeof(cl_int) * num_elements;
|
||||
size_t out_length = sizeof(cl_int) * wgsize;
|
||||
|
||||
input_ptr = (cl_int *)malloc(in_length);
|
||||
output_ptr = (cl_int *)malloc(out_length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, in_length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, out_length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_int) genrand_int32(d);
|
||||
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, in_length, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Validate that created kernel doesn't violate local memory size allowed by the device
|
||||
cl_ulong localMemSize = 0;
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(localMemSize), &localMemSize, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetDeviceInfo failed\n");
|
||||
return -1;
|
||||
}
|
||||
if ( wgsize > (localMemSize / (sizeof(cl_int)*sizeof(cl_int))) )
|
||||
{
|
||||
wgsize = localMemSize / (sizeof(cl_int)*sizeof(cl_int));
|
||||
}
|
||||
|
||||
sprintf(program_source, barrier_with_localmem_kernel_code[1], (int)(wgsize * sizeof(cl_int)));
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, (const char**)&program_source, "compute_sum_with_localmem" );
|
||||
free(program_source);
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof kwgsize, &kwgsize, NULL);
|
||||
test_error(err, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
// Pick the minimum of the device and the kernel
|
||||
if (kwgsize > max_local_workgroup_size[0])
|
||||
kwgsize = max_local_workgroup_size[0];
|
||||
|
||||
// err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes);
|
||||
err = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
global_threads[0] = wgsize;
|
||||
local_threads[0] = wgsize;
|
||||
|
||||
// Adjust the local thread size to fit and be a nice multiple.
|
||||
if (kwgsize < wgsize) {
|
||||
log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize);
|
||||
local_threads[0] = kwgsize;
|
||||
}
|
||||
while (global_threads[0] % local_threads[0] != 0)
|
||||
local_threads[0]--;
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_sum(input_ptr, output_ptr, num_elements);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
139
test_conformance/basic/test_local_kernel_scope.cpp
Normal file
139
test_conformance/basic/test_local_kernel_scope.cpp
Normal file
@@ -0,0 +1,139 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#define MAX_LOCAL_STORAGE_SIZE 256
|
||||
#define MAX_LOCAL_STORAGE_SIZE_STRING "256"
|
||||
|
||||
const char *kernelSource[] = {
|
||||
"__kernel void test( __global unsigned int * input, __global unsigned int *outMaxes )\n"
|
||||
"{\n"
|
||||
" __local unsigned int localStorage[ " MAX_LOCAL_STORAGE_SIZE_STRING " ];\n"
|
||||
" unsigned int theValue = input[ get_global_id( 0 ) ];\n"
|
||||
"\n"
|
||||
" // If we just write linearly, there's no verification that the items in a group share local data\n"
|
||||
" // So we write reverse-linearly, which requires items to read the local data written by at least one\n"
|
||||
" // different item\n"
|
||||
" localStorage[ get_local_size( 0 ) - get_local_id( 0 ) - 1 ] = theValue;\n"
|
||||
"\n"
|
||||
" // The barrier ensures that all local items have written to the local storage\n"
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
"\n"
|
||||
" // Now we loop back through the local storage and look for the max value. We only do this if\n"
|
||||
" // we're the first item in a group\n"
|
||||
" unsigned int max = 0;\n"
|
||||
" if( get_local_id( 0 ) == 0 )\n"
|
||||
" {\n"
|
||||
" for( size_t i = 0; i < get_local_size( 0 ); i++ )\n"
|
||||
" {\n"
|
||||
" if( localStorage[ i ] > max )\n"
|
||||
" max = localStorage[ i ];\n"
|
||||
" }\n"
|
||||
" outMaxes[ get_group_id( 0 ) ] = max;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
int test_local_kernel_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 2 ];
|
||||
MTdata randSeed = init_genrand( gRandomSeed );
|
||||
|
||||
// Create a test kernel
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, kernelSource, "test" );
|
||||
test_error( error, "Unable to create test kernel" );
|
||||
|
||||
|
||||
// Determine an appropriate test size
|
||||
size_t workGroupSize;
|
||||
error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workGroupSize ), &workGroupSize, NULL );
|
||||
test_error( error, "Unable to obtain kernel work group size" );
|
||||
|
||||
// Make sure the work group size doesn't overrun our local storage size in the kernel
|
||||
while( workGroupSize > MAX_LOCAL_STORAGE_SIZE )
|
||||
workGroupSize >>= 1;
|
||||
|
||||
size_t testSize = workGroupSize;
|
||||
while( testSize < 1024 )
|
||||
testSize += workGroupSize;
|
||||
size_t numGroups = testSize / workGroupSize;
|
||||
log_info( "\tTesting with %ld groups, %ld elements per group...\n", numGroups, workGroupSize );
|
||||
|
||||
// Create two buffers for operation
|
||||
cl_uint *inputData = (cl_uint*)malloc( testSize * sizeof(cl_uint) );
|
||||
generate_random_data( kUInt, testSize, randSeed, inputData );
|
||||
free_mtdata( randSeed );
|
||||
streams[ 0 ] = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, testSize * sizeof(cl_uint), inputData, &error );
|
||||
test_error( error, "Unable to create input buffer" );
|
||||
|
||||
cl_uint *outputData = (cl_uint*)malloc( numGroups *sizeof(cl_uint) );
|
||||
streams[ 1 ] = clCreateBuffer( context, CL_MEM_WRITE_ONLY, numGroups * sizeof(cl_uint), NULL, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
|
||||
|
||||
// Set up the kernel args and run
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
|
||||
test_error( error, "Unable to set kernel arg" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
|
||||
test_error( error, "Unable to set kernel arg" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &testSize, &workGroupSize, 0, NULL, NULL );
|
||||
test_error( error, "Unable to enqueue kernel" );
|
||||
|
||||
|
||||
// Read results and verify
|
||||
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, numGroups * sizeof(cl_uint), outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read output data" );
|
||||
|
||||
// MingW compiler seems to have a bug that otimizes the code below incorrectly.
|
||||
// adding the volatile keyword to size_t decleration to avoid aggressive optimization by the compiler.
|
||||
for( volatile size_t i = 0; i < numGroups; i++ )
|
||||
{
|
||||
// Determine the max in our case
|
||||
cl_uint localMax = 0;
|
||||
for( volatile size_t j = 0; j < workGroupSize; j++ )
|
||||
{
|
||||
if( inputData[ i * workGroupSize + j ] > localMax )
|
||||
localMax = inputData[ i * workGroupSize + j ];
|
||||
}
|
||||
|
||||
if( outputData[ i ] != localMax )
|
||||
{
|
||||
log_error( "ERROR: Local max validation failed! (expected %u, got %u for i=%lu)\n", localMax, outputData[ i ] , i );
|
||||
free(inputData);
|
||||
free(outputData);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
free(inputData);
|
||||
free(outputData);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
122
test_conformance/basic/test_local_linear_id.c
Normal file
122
test_conformance/basic/test_local_linear_id.c
Normal file
@@ -0,0 +1,122 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include "../../test_common/harness/rounding_mode.h"
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *local_linear_id_1d_code =
|
||||
"__kernel void test_local_linear_id_1d(global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" int linear_id = get_local_id(0);\n"
|
||||
" int result = (linear_id == (int)get_local_linear_id()) ? 0x1 : 0x0;\n"
|
||||
" dst[tid] = result;\n"
|
||||
"}\n";
|
||||
|
||||
static const char *local_linear_id_2d_code =
|
||||
"__kernel void test_local_linear_id_2d(global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
"\n"
|
||||
" int linear_id = get_local_id(1) * get_local_size(0) + get_local_id(0);\n"
|
||||
" int result = (linear_id == (int)get_local_linear_id()) ? 0x1 : 0x0;\n"
|
||||
" dst[tid_y * get_global_size(0) + tid_x] = result;\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static int
|
||||
verify_local_linear_id(int *result, int n)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
if (result[i] == 0)
|
||||
{
|
||||
log_error("get_local_linear_id failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
log_info("get_local_linear_id passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_local_linear_id(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams;
|
||||
cl_program program[2];
|
||||
cl_kernel kernel[2];
|
||||
|
||||
int *output_ptr;
|
||||
size_t threads[2];
|
||||
int err;
|
||||
num_elements = (int)sqrt((float)num_elements);
|
||||
int length = num_elements * num_elements;
|
||||
|
||||
output_ptr = (cl_int*)malloc(sizeof(int) * length);
|
||||
|
||||
streams = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length*sizeof(int), NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
|
||||
err = create_single_kernel_helper_with_build_options(context, &program[0], &kernel[0], 1, &local_linear_id_1d_code, "test_local_linear_id_1d", "-cl-std=CL2.0");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
err = create_single_kernel_helper_with_build_options(context, &program[1], &kernel[1], 1, &local_linear_id_2d_code, "test_local_linear_id_2d", "-cl-std=CL2.0");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams, &streams);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
threads[0] = (size_t)num_elements;
|
||||
threads[1] = (size_t)num_elements;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, length*sizeof(int), output_ptr, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
err = verify_local_linear_id(output_ptr, length);
|
||||
|
||||
threads[0] = (size_t)num_elements;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, num_elements*sizeof(int), output_ptr, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
err = verify_local_linear_id(output_ptr, num_elements);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams);
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseKernel(kernel[1]);
|
||||
clReleaseProgram(program[0]);
|
||||
clReleaseProgram(program[1]);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
185
test_conformance/basic/test_loop.c
Normal file
185
test_conformance/basic/test_loop.c
Normal file
@@ -0,0 +1,185 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *loop_kernel_code =
|
||||
"__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" int n = get_global_size(0);\n"
|
||||
" int i, j;\n"
|
||||
"\n"
|
||||
" dst[tid] = 0;\n"
|
||||
" for (i=0,j=loopindx[tid]; i<loopcnt[tid]; i++,j++)\n"
|
||||
" {\n"
|
||||
" if (j >= n)\n"
|
||||
" j = 0;\n"
|
||||
" dst[tid] += src[j];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
int
|
||||
verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n)
|
||||
{
|
||||
int r, i, j, k;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = 0;
|
||||
for (j=0,k=loopindx[i]; j<loopcnt[i]; j++,k++)
|
||||
{
|
||||
if (k >= n)
|
||||
k = 0;
|
||||
r += inptr[k];
|
||||
}
|
||||
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("LOOP test failed: %d found, expected %d\n", outptr[i], r);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("LOOP test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_int *input_ptr, *loop_indx, *loop_cnt, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
|
||||
size_t length = sizeof(cl_int) * num_elements;
|
||||
input_ptr = (cl_int*)malloc(length);
|
||||
loop_indx = (cl_int*)malloc(length);
|
||||
loop_cnt = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[3])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
input_ptr[i] = (int)genrand_int32(d);
|
||||
loop_indx[i] = (int)get_random_float(0, num_elements-1, d);
|
||||
loop_cnt[i] = (int)get_random_float(0, num_elements/32, d);
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, loop_indx, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, loop_cnt, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &loop_kernel_code, "test_loop" );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_loop(input_ptr, loop_indx, loop_cnt, output_ptr, num_elements);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(loop_indx);
|
||||
free(loop_cnt);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
236
test_conformance/basic/test_multireadimagemultifmt.c
Normal file
236
test_conformance/basic/test_multireadimagemultifmt.c
Normal file
@@ -0,0 +1,236 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *multireadimage_kernel_code =
|
||||
"__kernel void test_multireadimage(read_only image2d_t img0, read_only image2d_t img1, \n"
|
||||
" read_only image2d_t img2, __global float4 *dst, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int2 tid = (int2)(tid_x, tid_y);\n"
|
||||
" int indx = tid_y * get_image_width(img1) + tid_x;\n"
|
||||
" float4 sum;\n"
|
||||
"\n"
|
||||
" sum = read_imagef(img0, sampler, tid);\n"
|
||||
" sum += read_imagef(img1, sampler, tid);\n"
|
||||
" sum += read_imagef(img2, sampler, tid);\n"
|
||||
"\n"
|
||||
" dst[indx] = sum;\n"
|
||||
"}\n";
|
||||
|
||||
#define MAX_ERR 1e-7f
|
||||
|
||||
static unsigned char *
|
||||
generate_8888_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static unsigned short *
|
||||
generate_16bit_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned short *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned short)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static float *
|
||||
generate_float_image(int w, int h, MTdata d)
|
||||
{
|
||||
float *ptr = (float*)malloc(w * h * 4 * (int)sizeof(float));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
verify_multireadimage(void *image[], float *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
float sum;
|
||||
float ulp, max_ulp = 0.0f;
|
||||
|
||||
// ULP error of 1.5 for each read_imagef plus 0.5 for each addition.
|
||||
float max_ulp_allowed = (float)(3*1.5+2*0.5);
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
sum = (float)((unsigned char *)image[0])[i] / 255.0f;
|
||||
sum += (float)((unsigned short *)image[1])[i] / 65535.0f;
|
||||
sum += (float)((float *)image[2])[i];
|
||||
ulp = Ulp_Error(outptr[i], sum);
|
||||
if (ulp > max_ulp)
|
||||
max_ulp = ulp;
|
||||
}
|
||||
|
||||
if (max_ulp > max_ulp_allowed) {
|
||||
log_error("READ_MULTIREADIMAGE_MULTIFORMAT test failed. Max ulp error = %g\n", max_ulp);
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info("READ_MULTIREADIMAGE_MULTIFORMAT test passed. Max ulp error = %g\n", max_ulp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_multireadimagemultifmt(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_image_format img_format;
|
||||
void *input_ptr[3], *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[2];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int i, err;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
input_ptr[0] = (void *)generate_8888_image(img_width, img_height, d);
|
||||
input_ptr[1] = (void *)generate_16bit_image(img_width, img_height, d);
|
||||
input_ptr[2] = (void *)generate_float_image(img_width, img_height, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
output_ptr = (void *)malloc(sizeof(float) * 4 * img_width * img_height);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(float)*4 * img_width*img_height, NULL, NULL);
|
||||
if (!streams[3])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
size_t origin[3] = {0,0,0}, region[3]={img_width, img_height,1};
|
||||
err = clEnqueueWriteImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, input_ptr[i], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper( context, &program, &kernel, 1, &multireadimage_kernel_code, "test_multireadimage");
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
err |= clSetKernelArg(kernel, i,sizeof streams[i], &streams[i]);
|
||||
err |= clSetKernelArg(kernel, 4, sizeof sampler, &sampler);
|
||||
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (size_t)img_width;
|
||||
threads[1] = (size_t)img_height;
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, sizeof(float)*4*img_width*img_height, (void *)output_ptr, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_multireadimage(input_ptr, (float*)output_ptr, img_width, img_height);
|
||||
|
||||
// cleanup
|
||||
clReleaseSampler(sampler);
|
||||
for (i=0; i<4; i++)
|
||||
clReleaseMemObject(streams[i]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
for (i=0; i<3; i++)
|
||||
free(input_ptr[i]);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
204
test_conformance/basic/test_multireadimageonefmt.c
Normal file
204
test_conformance/basic/test_multireadimageonefmt.c
Normal file
@@ -0,0 +1,204 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *multireadimage_kernel_code =
|
||||
"__kernel void test_multireadimage(int n, int m, sampler_t sampler, \n"
|
||||
" read_only image2d_t img0, read_only image2d_t img1, \n"
|
||||
" read_only image2d_t img2, read_only image2d_t img3, \n"
|
||||
" read_only image2d_t img4, read_only image2d_t img5, \n"
|
||||
" read_only image2d_t img6, __global float4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int2 tid = (int2)(tid_x, tid_y);\n"
|
||||
" int indx = tid_y * get_image_width(img5) + tid_x;\n"
|
||||
" float4 sum;\n"
|
||||
"\n"
|
||||
" sum = read_imagef(img0, sampler, tid);\n"
|
||||
" sum += read_imagef(img1, sampler, tid);\n"
|
||||
" sum += read_imagef(img2, sampler, tid);\n"
|
||||
" sum += read_imagef(img3, sampler, tid);\n"
|
||||
" sum += read_imagef(img4, sampler, tid);\n"
|
||||
" sum += read_imagef(img5, sampler, tid);\n"
|
||||
" sum += read_imagef(img6, sampler, tid);\n"
|
||||
"\n"
|
||||
" dst[indx] = sum;\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static unsigned char *
|
||||
generate_8888_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_multireadimage(void *image[], int num_images, float *outptr, int w, int h)
|
||||
{
|
||||
int i, j;
|
||||
float sum;
|
||||
float ulp, max_ulp = 0.0f;
|
||||
|
||||
// ULP error of 1.5 for each read_imagef plus 0.5 for each addition.
|
||||
float max_ulp_allowed = (float)(num_images*1.5+0.5*(num_images-1));
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
sum = 0.0f;
|
||||
for (j=0; j<num_images; j++)
|
||||
{
|
||||
sum += ((float)((unsigned char *)image[j])[i] / 255.0f);
|
||||
}
|
||||
ulp = Ulp_Error(outptr[i], sum);
|
||||
if (ulp > max_ulp)
|
||||
max_ulp = ulp;
|
||||
}
|
||||
|
||||
if (max_ulp > max_ulp_allowed)
|
||||
{
|
||||
log_error("READ_MULTIREADIMAGE_RGBA8888 test failed. Max ULP err = %g\n", max_ulp);
|
||||
return -1;
|
||||
}
|
||||
log_info("READ_MULTIREADIMAGE_RGBA8888 test passed. Max ULP err = %g\n", max_ulp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int test_multireadimageonefmt(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[8];
|
||||
cl_image_format img_format;
|
||||
void *input_ptr[7], *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[2];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int i, err;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {img_width, img_height, 1};
|
||||
size_t length = img_width * img_height * 4 * sizeof(float);
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
output_ptr = malloc(length);
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<7; i++) {
|
||||
input_ptr[i] = (void *)generate_8888_image(img_width, img_height, d);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[i] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[i])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, input_ptr[i], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
|
||||
streams[7] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[7])
|
||||
{
|
||||
log_error("clCreateArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &multireadimage_kernel_code, "test_multireadimage");
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof i, &i);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof err, &err);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
|
||||
for (i=0; i<8; i++)
|
||||
err |= clSetKernelArg(kernel, 3+i, sizeof streams[i], &streams[i]);
|
||||
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)img_width;
|
||||
threads[1] = (unsigned int)img_height;
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clExecuteKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadBuffer(queue, streams[7], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_multireadimage(input_ptr, 7, (float *)output_ptr, img_width, img_height);
|
||||
|
||||
// cleanup
|
||||
clReleaseSampler(sampler);
|
||||
for (i=0; i<8; i++)
|
||||
clReleaseMemObject(streams[i]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
for (i=0; i<7; i++)
|
||||
free(input_ptr[i]);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
710
test_conformance/basic/test_numeric_constants.cpp
Normal file
710
test_conformance/basic/test_numeric_constants.cpp
Normal file
@@ -0,0 +1,710 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
|
||||
#define TEST_VALUE_POSITIVE( string_name, name, value ) \
|
||||
{ \
|
||||
if (name < value) { \
|
||||
log_error("FAILED: " string_name ": " #name " < " #value "\n"); \
|
||||
errors++;\
|
||||
} else { \
|
||||
log_info("\t" string_name ": " #name " >= " #value "\n"); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TEST_VALUE_NEGATIVE( string_name, name, value ) \
|
||||
{ \
|
||||
if (name > value) { \
|
||||
log_error("FAILED: " string_name ": " #name " > " #value "\n"); \
|
||||
errors++;\
|
||||
} else { \
|
||||
log_info("\t" string_name ": " #name " <= " #value "\n"); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TEST_VALUE_EQUAL_LITERAL( string_name, name, value ) \
|
||||
{ \
|
||||
if (name != value) { \
|
||||
log_error("FAILED: " string_name ": " #name " != " #value "\n"); \
|
||||
errors++;\
|
||||
} else { \
|
||||
log_info("\t" string_name ": " #name " = " #value "\n"); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TEST_VALUE_EQUAL( string_name, name, value ) \
|
||||
{ \
|
||||
if (name != value) { \
|
||||
log_error("FAILED: " string_name ": " #name " != %a (%17.21g)\n", value, value); \
|
||||
errors++;\
|
||||
} else { \
|
||||
log_info("\t" string_name ": " #name " = %a (%17.21g)\n", value, value); \
|
||||
} \
|
||||
}
|
||||
|
||||
int test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int errors = 0;
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_BIT", CL_CHAR_BIT, 8)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_SCHAR_MAX", CL_SCHAR_MAX, 127)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_SCHAR_MIN", CL_SCHAR_MIN, (-127-1))
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_MAX", CL_CHAR_MAX, CL_SCHAR_MAX)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_MIN", CL_CHAR_MIN, CL_SCHAR_MIN)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_UCHAR_MAX", CL_UCHAR_MAX, 255)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_SHRT_MAX", CL_SHRT_MAX, 32767)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_SHRT_MIN", CL_SHRT_MIN, (-32767-1))
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_USHRT_MAX", CL_USHRT_MAX, 65535)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_INT_MAX", CL_INT_MAX, 2147483647)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_INT_MIN", CL_INT_MIN, (-2147483647-1))
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_UINT_MAX", CL_UINT_MAX, 0xffffffffU)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_LONG_MAX", CL_LONG_MAX, ((cl_long) 0x7FFFFFFFFFFFFFFFLL))
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_LONG_MIN", CL_LONG_MIN, ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL))
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_ULONG_MAX", CL_ULONG_MAX, ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL))
|
||||
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_DIG", CL_FLT_DIG, 6)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MANT_DIG", CL_FLT_MANT_DIG, 24)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX_10_EXP", CL_FLT_MAX_10_EXP, +38)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX_EXP", CL_FLT_MAX_EXP, +128)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN_10_EXP", CL_FLT_MIN_10_EXP, -37)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN_EXP", CL_FLT_MIN_EXP, -125)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_RADIX", CL_FLT_RADIX, 2)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX", CL_FLT_MAX, MAKE_HEX_FLOAT( 0x1.fffffep127f, 0x1fffffeL, 103))
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN", CL_FLT_MIN, MAKE_HEX_FLOAT(0x1.0p-126f, 0x1L, -126))
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_FLT_EPSILON", CL_FLT_EPSILON, MAKE_HEX_FLOAT(0x1.0p-23f, 0x1L, -23))
|
||||
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_DIG", CL_DBL_DIG, 15)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MANT_DIG", CL_DBL_MANT_DIG, 53)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MAX_10_EXP", CL_DBL_MAX_10_EXP, +308)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MAX_EXP", CL_DBL_MAX_EXP, +1024)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MIN_10_EXP", CL_DBL_MIN_10_EXP, -307)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MIN_EXP", CL_DBL_MIN_EXP, -1021)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CL_DBL_RADIX", CL_DBL_RADIX, 2)
|
||||
TEST_VALUE_EQUAL( "CL_DBL_MAX", CL_DBL_MAX, MAKE_HEX_DOUBLE(0x1.fffffffffffffp1023, 0x1fffffffffffffLL, 971))
|
||||
TEST_VALUE_EQUAL( "CL_DBL_MIN", CL_DBL_MIN, MAKE_HEX_DOUBLE(0x1.0p-1022, 0x1LL, -1022))
|
||||
TEST_VALUE_EQUAL( "CL_DBL_EPSILON", CL_DBL_EPSILON, MAKE_HEX_DOUBLE(0x1.0p-52, 0x1LL, -52))
|
||||
|
||||
TEST_VALUE_EQUAL( "CL_M_E", CL_M_E, MAKE_HEX_DOUBLE(0x1.5bf0a8b145769p+1, 0x15bf0a8b145769LL, -51) );
|
||||
TEST_VALUE_EQUAL( "CL_M_LOG2E", CL_M_LOG2E, MAKE_HEX_DOUBLE(0x1.71547652b82fep+0, 0x171547652b82feLL, -52) );
|
||||
TEST_VALUE_EQUAL( "CL_M_LOG10E", CL_M_LOG10E, MAKE_HEX_DOUBLE(0x1.bcb7b1526e50ep-2, 0x1bcb7b1526e50eLL, -54) );
|
||||
TEST_VALUE_EQUAL( "CL_M_LN2", CL_M_LN2, MAKE_HEX_DOUBLE(0x1.62e42fefa39efp-1, 0x162e42fefa39efLL, -53) );
|
||||
TEST_VALUE_EQUAL( "CL_M_LN10", CL_M_LN10, MAKE_HEX_DOUBLE(0x1.26bb1bbb55516p+1, 0x126bb1bbb55516LL, -51) );
|
||||
TEST_VALUE_EQUAL( "CL_M_PI", CL_M_PI, MAKE_HEX_DOUBLE(0x1.921fb54442d18p+1, 0x1921fb54442d18LL, -51) );
|
||||
TEST_VALUE_EQUAL( "CL_M_PI_2", CL_M_PI_2, MAKE_HEX_DOUBLE(0x1.921fb54442d18p+0, 0x1921fb54442d18LL, -52) );
|
||||
TEST_VALUE_EQUAL( "CL_M_PI_4", CL_M_PI_4, MAKE_HEX_DOUBLE(0x1.921fb54442d18p-1, 0x1921fb54442d18LL, -53) );
|
||||
TEST_VALUE_EQUAL( "CL_M_1_PI", CL_M_1_PI, MAKE_HEX_DOUBLE(0x1.45f306dc9c883p-2, 0x145f306dc9c883LL, -54) );
|
||||
TEST_VALUE_EQUAL( "CL_M_2_PI", CL_M_2_PI, MAKE_HEX_DOUBLE(0x1.45f306dc9c883p-1, 0x145f306dc9c883LL, -53) );
|
||||
TEST_VALUE_EQUAL( "CL_M_2_SQRTPI", CL_M_2_SQRTPI, MAKE_HEX_DOUBLE(0x1.20dd750429b6dp+0, 0x120dd750429b6dLL, -52) );
|
||||
TEST_VALUE_EQUAL( "CL_M_SQRT2", CL_M_SQRT2, MAKE_HEX_DOUBLE(0x1.6a09e667f3bcdp+0, 0x16a09e667f3bcdLL, -52) );
|
||||
TEST_VALUE_EQUAL( "CL_M_SQRT1_2", CL_M_SQRT1_2, MAKE_HEX_DOUBLE(0x1.6a09e667f3bcdp-1, 0x16a09e667f3bcdLL, -53) );
|
||||
|
||||
TEST_VALUE_EQUAL( "CL_M_E_F", CL_M_E_F, MAKE_HEX_FLOAT(0x1.5bf0a8p+1f, 0x15bf0a8L, -23));
|
||||
TEST_VALUE_EQUAL( "CL_M_LOG2E_F", CL_M_LOG2E_F, MAKE_HEX_FLOAT(0x1.715476p+0f, 0x1715476L, -24));
|
||||
TEST_VALUE_EQUAL( "CL_M_LOG10E_F", CL_M_LOG10E_F, MAKE_HEX_FLOAT(0x1.bcb7b2p-2f, 0x1bcb7b2L, -26));
|
||||
TEST_VALUE_EQUAL( "CL_M_LN2_F", CL_M_LN2_F, MAKE_HEX_FLOAT(0x1.62e43p-1f, 0x162e43L, -21) );
|
||||
TEST_VALUE_EQUAL( "CL_M_LN10_F", CL_M_LN10_F, MAKE_HEX_FLOAT(0x1.26bb1cp+1f, 0x126bb1cL, -23));
|
||||
TEST_VALUE_EQUAL( "CL_M_PI_F", CL_M_PI_F, MAKE_HEX_FLOAT(0x1.921fb6p+1f, 0x1921fb6L, -23));
|
||||
TEST_VALUE_EQUAL( "CL_M_PI_2_F", CL_M_PI_2_F, MAKE_HEX_FLOAT(0x1.921fb6p+0f, 0x1921fb6L, -24));
|
||||
TEST_VALUE_EQUAL( "CL_M_PI_4_F", CL_M_PI_4_F, MAKE_HEX_FLOAT(0x1.921fb6p-1f, 0x1921fb6L, -25));
|
||||
TEST_VALUE_EQUAL( "CL_M_1_PI_F", CL_M_1_PI_F, MAKE_HEX_FLOAT(0x1.45f306p-2f, 0x145f306L, -26));
|
||||
TEST_VALUE_EQUAL( "CL_M_2_PI_F", CL_M_2_PI_F, MAKE_HEX_FLOAT(0x1.45f306p-1f, 0x145f306L, -25));
|
||||
TEST_VALUE_EQUAL( "CL_M_2_SQRTPI_F", CL_M_2_SQRTPI_F,MAKE_HEX_FLOAT(0x1.20dd76p+0f, 0x120dd76L, -24));
|
||||
TEST_VALUE_EQUAL( "CL_M_SQRT2_F", CL_M_SQRT2_F, MAKE_HEX_FLOAT(0x1.6a09e6p+0f, 0x16a09e6L, -24));
|
||||
TEST_VALUE_EQUAL( "CL_M_SQRT1_2_F", CL_M_SQRT1_2_F, MAKE_HEX_FLOAT(0x1.6a09e6p-1f, 0x16a09e6L, -25));
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
|
||||
const char *kernel_int_float[] = {
|
||||
"__kernel void test( __global float *float_out, __global int *int_out, __global uint *uint_out) \n"
|
||||
"{\n"
|
||||
" int_out[0] = CHAR_BIT;\n"
|
||||
" int_out[1] = SCHAR_MAX;\n"
|
||||
" int_out[2] = SCHAR_MIN;\n"
|
||||
" int_out[3] = CHAR_MAX;\n"
|
||||
" int_out[4] = CHAR_MIN;\n"
|
||||
" int_out[5] = UCHAR_MAX;\n"
|
||||
" int_out[6] = SHRT_MAX;\n"
|
||||
" int_out[7] = SHRT_MIN;\n"
|
||||
" int_out[8] = USHRT_MAX;\n"
|
||||
" int_out[9] = INT_MAX;\n"
|
||||
" int_out[10] = INT_MIN;\n"
|
||||
" uint_out[0] = UINT_MAX;\n"
|
||||
|
||||
" int_out[11] = FLT_DIG;\n"
|
||||
" int_out[12] = FLT_MANT_DIG;\n"
|
||||
" int_out[13] = FLT_MAX_10_EXP;\n"
|
||||
" int_out[14] = FLT_MAX_EXP;\n"
|
||||
" int_out[15] = FLT_MIN_10_EXP;\n"
|
||||
" int_out[16] = FLT_MIN_EXP;\n"
|
||||
" int_out[17] = FLT_RADIX;\n"
|
||||
"#ifdef __IMAGE_SUPPORT__\n"
|
||||
" int_out[18] = __IMAGE_SUPPORT__;\n"
|
||||
"#else\n"
|
||||
" int_out[18] = 0xf00baa;\n"
|
||||
"#endif\n"
|
||||
" float_out[0] = FLT_MAX;\n"
|
||||
" float_out[1] = FLT_MIN;\n"
|
||||
" float_out[2] = FLT_EPSILON;\n"
|
||||
" float_out[3] = M_E_F;\n"
|
||||
" float_out[4] = M_LOG2E_F;\n"
|
||||
" float_out[5] = M_LOG10E_F;\n"
|
||||
" float_out[6] = M_LN2_F;\n"
|
||||
" float_out[7] = M_LN10_F;\n"
|
||||
" float_out[8] = M_PI_F;\n"
|
||||
" float_out[9] = M_PI_2_F;\n"
|
||||
" float_out[10] = M_PI_4_F;\n"
|
||||
" float_out[11] = M_1_PI_F;\n"
|
||||
" float_out[12] = M_2_PI_F;\n"
|
||||
" float_out[13] = M_2_SQRTPI_F;\n"
|
||||
" float_out[14] = M_SQRT2_F;\n"
|
||||
" float_out[15] = M_SQRT1_2_F;\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *kernel_long[] = {
|
||||
"__kernel void test(__global long *long_out, __global ulong *ulong_out) \n"
|
||||
"{\n"
|
||||
" long_out[0] = LONG_MAX;\n"
|
||||
" long_out[1] = LONG_MIN;\n"
|
||||
" ulong_out[0] = ULONG_MAX;\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *kernel_double[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
|
||||
"__kernel void test( __global double *double_out, __global long *long_out ) \n "
|
||||
"{\n"
|
||||
" long_out[0] = DBL_DIG;\n"
|
||||
" long_out[1] = DBL_MANT_DIG;\n"
|
||||
" long_out[2] = DBL_MAX_10_EXP;\n"
|
||||
" long_out[3] = DBL_MAX_EXP;\n"
|
||||
" long_out[4] = DBL_MIN_10_EXP;\n"
|
||||
" long_out[5] = DBL_MIN_EXP;\n"
|
||||
" long_out[6] = DBL_RADIX;\n"
|
||||
" double_out[0] = DBL_MAX;\n"
|
||||
" double_out[1] = DBL_MIN;\n"
|
||||
" double_out[2] = DBL_EPSILON;\n"
|
||||
" double_out[3] = M_E;\n"
|
||||
" double_out[4] = M_LOG2E;\n"
|
||||
" double_out[5] = M_LOG10E;\n"
|
||||
" double_out[6] = M_LN2;\n"
|
||||
" double_out[7] = M_LN10;\n"
|
||||
" double_out[8] = M_PI;\n"
|
||||
" double_out[9] = M_PI_2;\n"
|
||||
" double_out[10] = M_PI_4;\n"
|
||||
" double_out[11] = M_1_PI;\n"
|
||||
" double_out[12] = M_2_PI;\n"
|
||||
" double_out[13] = M_2_SQRTPI;\n"
|
||||
" double_out[14] = M_SQRT2;\n"
|
||||
" double_out[15] = M_SQRT1_2;\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
int test_kernel_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error, errors = 0;
|
||||
// clProgramWrapper program;
|
||||
// clKernelWrapper kernel;
|
||||
// clMemWrapper streams[3];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
cl_mem streams[3];
|
||||
|
||||
size_t threads[] = {1,1,1};
|
||||
cl_float float_out[16];
|
||||
cl_int int_out[19];
|
||||
cl_uint uint_out[1];
|
||||
cl_long long_out[7];
|
||||
cl_ulong ulong_out[1];
|
||||
cl_double double_out[16];
|
||||
|
||||
/** INTs and FLOATs **/
|
||||
|
||||
// Create the kernel
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_int_float, "test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(float_out), NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(int_out), NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(uint_out), NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(float_out), (void*)float_out, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(int_out), (void*)int_out, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(uint_out), (void*)uint_out, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
TEST_VALUE_EQUAL_LITERAL( "CHAR_BIT", int_out[0], 8)
|
||||
TEST_VALUE_EQUAL_LITERAL( "SCHAR_MAX", int_out[1], 127)
|
||||
TEST_VALUE_EQUAL_LITERAL( "SCHAR_MIN", int_out[2], (-127-1))
|
||||
TEST_VALUE_EQUAL_LITERAL( "CHAR_MAX", int_out[3], CL_SCHAR_MAX)
|
||||
TEST_VALUE_EQUAL_LITERAL( "CHAR_MIN", int_out[4], CL_SCHAR_MIN)
|
||||
TEST_VALUE_EQUAL_LITERAL( "UCHAR_MAX", int_out[5], 255)
|
||||
TEST_VALUE_EQUAL_LITERAL( "SHRT_MAX", int_out[6], 32767)
|
||||
TEST_VALUE_EQUAL_LITERAL( "SHRT_MIN",int_out[7], (-32767-1))
|
||||
TEST_VALUE_EQUAL_LITERAL( "USHRT_MAX", int_out[8], 65535)
|
||||
TEST_VALUE_EQUAL_LITERAL( "INT_MAX", int_out[9], 2147483647)
|
||||
TEST_VALUE_EQUAL_LITERAL( "INT_MIN", int_out[10], (-2147483647-1))
|
||||
TEST_VALUE_EQUAL_LITERAL( "UINT_MAX", uint_out[0], 0xffffffffU)
|
||||
|
||||
TEST_VALUE_EQUAL_LITERAL( "FLT_DIG", int_out[11], 6)
|
||||
TEST_VALUE_EQUAL_LITERAL( "FLT_MANT_DIG", int_out[12], 24)
|
||||
TEST_VALUE_EQUAL_LITERAL( "FLT_MAX_10_EXP", int_out[13], +38)
|
||||
TEST_VALUE_EQUAL_LITERAL( "FLT_MAX_EXP", int_out[14], +128)
|
||||
TEST_VALUE_EQUAL_LITERAL( "FLT_MIN_10_EXP", int_out[15], -37)
|
||||
TEST_VALUE_EQUAL_LITERAL( "FLT_MIN_EXP", int_out[16], -125)
|
||||
TEST_VALUE_EQUAL_LITERAL( "FLT_RADIX", int_out[17], 2)
|
||||
TEST_VALUE_EQUAL( "FLT_MAX", float_out[0], MAKE_HEX_FLOAT(0x1.fffffep127f, 0x1fffffeL, 103))
|
||||
TEST_VALUE_EQUAL( "FLT_MIN", float_out[1], MAKE_HEX_FLOAT(0x1.0p-126f, 0x1L, -126))
|
||||
TEST_VALUE_EQUAL( "FLT_EPSILON", float_out[2], MAKE_HEX_FLOAT(0x1.0p-23f, 0x1L, -23))
|
||||
TEST_VALUE_EQUAL( "M_E_F", float_out[3], CL_M_E_F )
|
||||
TEST_VALUE_EQUAL( "M_LOG2E_F", float_out[4], CL_M_LOG2E_F )
|
||||
TEST_VALUE_EQUAL( "M_LOG10E_F", float_out[5], CL_M_LOG10E_F )
|
||||
TEST_VALUE_EQUAL( "M_LN2_F", float_out[6], CL_M_LN2_F )
|
||||
TEST_VALUE_EQUAL( "M_LN10_F", float_out[7], CL_M_LN10_F )
|
||||
TEST_VALUE_EQUAL( "M_PI_F", float_out[8], CL_M_PI_F )
|
||||
TEST_VALUE_EQUAL( "M_PI_2_F", float_out[9], CL_M_PI_2_F )
|
||||
TEST_VALUE_EQUAL( "M_PI_4_F", float_out[10], CL_M_PI_4_F )
|
||||
TEST_VALUE_EQUAL( "M_1_PI_F", float_out[11], CL_M_1_PI_F )
|
||||
TEST_VALUE_EQUAL( "M_2_PI_F", float_out[12], CL_M_2_PI_F )
|
||||
TEST_VALUE_EQUAL( "M_2_SQRTPI_F", float_out[13], CL_M_2_SQRTPI_F )
|
||||
TEST_VALUE_EQUAL( "M_SQRT2_F", float_out[14], CL_M_SQRT2_F )
|
||||
TEST_VALUE_EQUAL( "M_SQRT1_2_F", float_out[15], CL_M_SQRT1_2_F )
|
||||
|
||||
// We need to check these values against what we know is supported on the device
|
||||
if( checkForImageSupport( deviceID ) == 0 )
|
||||
{ // has images
|
||||
// If images are supported, the constant should have been defined to the value 1
|
||||
if( int_out[18] == 0xf00baa )
|
||||
{
|
||||
log_error( "FAILURE: __IMAGE_SUPPORT__ undefined even though images are supported\n" );
|
||||
return -1;
|
||||
}
|
||||
else if( int_out[18] != 1 )
|
||||
{
|
||||
log_error( "FAILURE: __IMAGE_SUPPORT__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", int_out[18] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // no images
|
||||
// If images aren't supported, the constant should be undefined
|
||||
if( int_out[18] != 0xf00baa )
|
||||
{
|
||||
log_error( "FAILURE: __IMAGE_SUPPORT__ defined to value %d even though images aren't supported", int_out[18] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
log_info( "\t__IMAGE_SUPPORT__: %d\n", int_out[18]);
|
||||
|
||||
clReleaseMemObject(streams[0]); streams[0] = NULL;
|
||||
clReleaseMemObject(streams[1]); streams[1] = NULL;
|
||||
clReleaseMemObject(streams[2]); streams[2] = NULL;
|
||||
clReleaseKernel(kernel); kernel = NULL;
|
||||
clReleaseProgram(program); program = NULL;
|
||||
|
||||
/** LONGs **/
|
||||
|
||||
if(!gHasLong) {
|
||||
log_info("Longs not supported; skipping long tests.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
// Create the kernel
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_long, "test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(long_out), NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(ulong_out), NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(long_out), &long_out, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(ulong_out), &ulong_out, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
TEST_VALUE_EQUAL_LITERAL( "LONG_MAX", long_out[0], ((cl_long) 0x7FFFFFFFFFFFFFFFLL))
|
||||
TEST_VALUE_EQUAL_LITERAL( "LONG_MIN", long_out[1], ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL))
|
||||
TEST_VALUE_EQUAL_LITERAL( "ULONG_MAX", ulong_out[0], ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL))
|
||||
|
||||
clReleaseMemObject(streams[0]); streams[0] = NULL;
|
||||
clReleaseMemObject(streams[1]); streams[1] = NULL;
|
||||
clReleaseKernel(kernel); kernel = NULL;
|
||||
clReleaseProgram(program); program = NULL;
|
||||
}
|
||||
|
||||
/** DOUBLEs **/
|
||||
|
||||
if(!is_extension_available(deviceID, "cl_khr_fp64")) {
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
// Create the kernel
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_double, "test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(double_out), NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(long_out), NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(double_out), &double_out, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(long_out), &long_out, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
TEST_VALUE_EQUAL_LITERAL( "DBL_DIG", long_out[0], 15)
|
||||
TEST_VALUE_EQUAL_LITERAL( "DBL_MANT_DIG", long_out[1], 53)
|
||||
TEST_VALUE_EQUAL_LITERAL( "DBL_MAX_10_EXP", long_out[2], +308)
|
||||
TEST_VALUE_EQUAL_LITERAL( "DBL_MAX_EXP", long_out[3], +1024)
|
||||
TEST_VALUE_EQUAL_LITERAL( "DBL_MIN_10_EXP", long_out[4], -307)
|
||||
TEST_VALUE_EQUAL_LITERAL( "DBL_MIN_EXP", long_out[5], -1021)
|
||||
TEST_VALUE_EQUAL_LITERAL( "DBL_RADIX", long_out[6], 2)
|
||||
TEST_VALUE_EQUAL( "DBL_MAX", double_out[0], MAKE_HEX_DOUBLE(0x1.fffffffffffffp1023, 0x1fffffffffffffLL, 971))
|
||||
TEST_VALUE_EQUAL( "DBL_MIN", double_out[1], MAKE_HEX_DOUBLE(0x1.0p-1022, 0x1LL, -1022))
|
||||
TEST_VALUE_EQUAL( "DBL_EPSILON", double_out[2], MAKE_HEX_DOUBLE(0x1.0p-52, 0x1LL, -52))
|
||||
//TEST_VALUE_EQUAL( "M_E", double_out[3], CL_M_E )
|
||||
TEST_VALUE_EQUAL( "M_LOG2E", double_out[4], CL_M_LOG2E )
|
||||
TEST_VALUE_EQUAL( "M_LOG10E", double_out[5], CL_M_LOG10E )
|
||||
TEST_VALUE_EQUAL( "M_LN2", double_out[6], CL_M_LN2 )
|
||||
TEST_VALUE_EQUAL( "M_LN10", double_out[7], CL_M_LN10 )
|
||||
TEST_VALUE_EQUAL( "M_PI", double_out[8], CL_M_PI )
|
||||
TEST_VALUE_EQUAL( "M_PI_2", double_out[9], CL_M_PI_2 )
|
||||
TEST_VALUE_EQUAL( "M_PI_4", double_out[10], CL_M_PI_4 )
|
||||
TEST_VALUE_EQUAL( "M_1_PI", double_out[11], CL_M_1_PI )
|
||||
TEST_VALUE_EQUAL( "M_2_PI", double_out[12], CL_M_2_PI )
|
||||
TEST_VALUE_EQUAL( "M_2_SQRTPI", double_out[13], CL_M_2_SQRTPI )
|
||||
TEST_VALUE_EQUAL( "M_SQRT2", double_out[14], CL_M_SQRT2 )
|
||||
TEST_VALUE_EQUAL( "M_SQRT1_2", double_out[15], CL_M_SQRT1_2 )
|
||||
|
||||
clReleaseMemObject(streams[0]); streams[0] = NULL;
|
||||
clReleaseMemObject(streams[1]); streams[1] = NULL;
|
||||
clReleaseKernel(kernel); kernel = NULL;
|
||||
clReleaseProgram(program); program = NULL;
|
||||
}
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
|
||||
const char *kernel_constant_limits[] = {
|
||||
"__kernel void test( __global int *intOut, __global float *floatOut ) \n"
|
||||
"{\n"
|
||||
" intOut[0] = isinf( MAXFLOAT ) ? 1 : 0;\n"
|
||||
" intOut[1] = isnormal( MAXFLOAT ) ? 1 : 0;\n"
|
||||
" intOut[2] = isnan( MAXFLOAT ) ? 1 : 0;\n"
|
||||
" intOut[3] = sizeof( MAXFLOAT );\n"
|
||||
" intOut[4] = ( MAXFLOAT == FLT_MAX ) ? 1 : 0;\n"
|
||||
// " intOut[5] = ( MAXFLOAT == CL_FLT_MAX ) ? 1 : 0;\n"
|
||||
" intOut[6] = ( MAXFLOAT == MAXFLOAT ) ? 1 : 0;\n"
|
||||
" intOut[7] = ( MAXFLOAT == 0x1.fffffep127f ) ? 1 : 0;\n"
|
||||
" floatOut[0] = MAXFLOAT;\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *kernel_constant_extended_limits[] = {
|
||||
"__kernel void test( __global int *intOut, __global float *floatOut ) \n"
|
||||
"{\n"
|
||||
" intOut[0] = ( INFINITY == HUGE_VALF ) ? 1 : 0;\n"
|
||||
" intOut[1] = sizeof( INFINITY );\n"
|
||||
" intOut[2] = isinf( INFINITY ) ? 1 : 0;\n"
|
||||
" intOut[3] = isnormal( INFINITY ) ? 1 : 0;\n"
|
||||
" intOut[4] = isnan( INFINITY ) ? 1 : 0;\n"
|
||||
" intOut[5] = ( INFINITY > MAXFLOAT ) ? 1 : 0;\n"
|
||||
" intOut[6] = ( -INFINITY < -MAXFLOAT ) ? 1 : 0;\n"
|
||||
" intOut[7] = ( ( MAXFLOAT + MAXFLOAT ) == INFINITY ) ? 1 : 0;\n"
|
||||
" intOut[8] = ( nextafter( MAXFLOAT, INFINITY ) == INFINITY ) ? 1 : 0;\n"
|
||||
" intOut[9] = ( nextafter( -MAXFLOAT, -INFINITY ) == -INFINITY ) ? 1 : 0;\n"
|
||||
" intOut[10] = ( INFINITY == INFINITY ) ? 1 : 0;\n"
|
||||
" intOut[11] = ( as_uint( INFINITY ) == 0x7f800000 ) ? 1 : 0;\n"
|
||||
" floatOut[0] = INFINITY;\n"
|
||||
"\n"
|
||||
" intOut[12] = sizeof( HUGE_VALF );\n"
|
||||
" intOut[13] = ( HUGE_VALF == INFINITY ) ? 1 : 0;\n"
|
||||
" floatOut[1] = HUGE_VALF;\n"
|
||||
"\n"
|
||||
" intOut[14] = ( NAN == NAN ) ? 1 : 0;\n"
|
||||
" intOut[15] = ( NAN != NAN ) ? 1 : 0;\n"
|
||||
" intOut[16] = isnan( NAN ) ? 1 : 0;\n"
|
||||
" intOut[17] = isinf( NAN ) ? 1 : 0;\n"
|
||||
" intOut[18] = isnormal( NAN ) ? 1 : 0;\n"
|
||||
" intOut[19] = ( ( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000 ) ? 1 : 0;\n"
|
||||
" intOut[20] = sizeof( NAN );\n"
|
||||
" floatOut[2] = NAN;\n"
|
||||
"\n"
|
||||
" intOut[21] = isnan( INFINITY / INFINITY ) ? 1 : 0;\n"
|
||||
" intOut[22] = isnan( INFINITY - INFINITY ) ? 1 : 0;\n"
|
||||
" intOut[23] = isnan( 0.f / 0.f ) ? 1 : 0;\n"
|
||||
" intOut[24] = isnan( INFINITY * 0.f ) ? 1 : 0;\n"
|
||||
" intOut[25] = ( INFINITY == NAN ); \n"
|
||||
" intOut[26] = ( -INFINITY == NAN ); \n"
|
||||
" intOut[27] = ( INFINITY > NAN ); \n"
|
||||
" intOut[28] = ( -INFINITY < NAN ); \n"
|
||||
" intOut[29] = ( INFINITY != NAN ); \n"
|
||||
" intOut[30] = ( NAN > INFINITY ); \n"
|
||||
" intOut[31] = ( NAN < -INFINITY ); \n"
|
||||
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *kernel_constant_double_limits[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
|
||||
"__kernel void test( __global int *intOut, __global double *doubleOut ) \n"
|
||||
"{\n"
|
||||
" intOut[0] = sizeof( HUGE_VAL );\n"
|
||||
" intOut[1] = ( HUGE_VAL == INFINITY ) ? 1 : 0;\n"
|
||||
" intOut[2] = isinf( HUGE_VAL ) ? 1 : 0;\n"
|
||||
" intOut[3] = isnormal( HUGE_VAL ) ? 1 : 0;\n"
|
||||
" intOut[4] = isnan( HUGE_VAL ) ? 1 : 0;\n"
|
||||
" intOut[5] = ( HUGE_VAL == HUGE_VALF ) ? 1 : 0;\n"
|
||||
" intOut[6] = ( as_ulong( HUGE_VAL ) == 0x7ff0000000000000UL ) ? 1 : 0;\n"
|
||||
" doubleOut[0] = HUGE_VAL;\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
#define TEST_FLOAT_ASSERTION( a, msg, f ) if( !( a ) ) { log_error( "ERROR: Float constant failed requirement: %s (bitwise value is 0x%8.8x)\n", msg, *( (uint32_t *)&f ) ); return -1; }
|
||||
#define TEST_DOUBLE_ASSERTION( a, msg, f ) if( !( a ) ) { log_error( "ERROR: Double constant failed requirement: %s (bitwise value is 0x%16.16llx)\n", msg, *( (uint64_t *)&f ) ); return -1; }
|
||||
|
||||
int test_kernel_limit_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
size_t threads[] = {1,1,1};
|
||||
clMemWrapper intStream, floatStream, doubleStream;
|
||||
cl_int intOut[ 32 ];
|
||||
cl_float floatOut[ 3 ];
|
||||
cl_double doubleOut[ 1 ];
|
||||
|
||||
|
||||
/* Create some I/O streams */
|
||||
intStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(intOut), NULL, &error );
|
||||
test_error( error, "Creating test array failed" );
|
||||
floatStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(floatOut), NULL, &error );
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
// Stage 1: basic limits on MAXFLOAT
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_limits, "test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( floatStream ), &floatStream );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
error = clEnqueueReadBuffer( queue, floatStream, CL_TRUE, 0, sizeof(floatOut), floatOut, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
// Test MAXFLOAT properties
|
||||
TEST_FLOAT_ASSERTION( intOut[0] == 0, "isinf( MAXFLOAT ) = false", floatOut[0] )
|
||||
TEST_FLOAT_ASSERTION( intOut[1] == 1, "isnormal( MAXFLOAT ) = true", floatOut[0] )
|
||||
TEST_FLOAT_ASSERTION( intOut[2] == 0, "isnan( MAXFLOAT ) = false", floatOut[0] )
|
||||
TEST_FLOAT_ASSERTION( intOut[3] == 4, "sizeof( MAXFLOAT ) = 4", floatOut[0] )
|
||||
TEST_FLOAT_ASSERTION( intOut[4] == 1, "MAXFLOAT = FLT_MAX", floatOut[0] )
|
||||
TEST_FLOAT_ASSERTION( floatOut[0] == CL_FLT_MAX, "MAXFLOAT = CL_FLT_MAX", floatOut[0] )
|
||||
TEST_FLOAT_ASSERTION( intOut[6] == 1, "MAXFLOAT = MAXFLOAT", floatOut[0] )
|
||||
TEST_FLOAT_ASSERTION( floatOut[0] == MAKE_HEX_FLOAT( 0x1.fffffep127f, 0x1fffffeL, 103), "MAXFLOAT = 0x1.fffffep127f", floatOut[0] )
|
||||
}
|
||||
|
||||
// Stage 2: INFINITY and NAN
|
||||
char profileStr[128] = "";
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL );
|
||||
test_error( error, "Unable to run INFINITY/NAN tests (unable to get CL_DEVICE_PROFILE" );
|
||||
|
||||
bool testInfNan = true;
|
||||
if( strcmp( profileStr, "EMBEDDED_PROFILE" ) == 0 )
|
||||
{
|
||||
// We test if we're not an embedded profile, OR if the inf/nan flag in the config is set
|
||||
cl_device_fp_config single = 0;
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
|
||||
test_error( error, "Unable to run INFINITY/NAN tests (unable to get FP_CONFIG bits)" );
|
||||
|
||||
if( ( single & CL_FP_INF_NAN ) == 0 )
|
||||
{
|
||||
log_info( "Skipping INFINITY and NAN tests on embedded device (INF/NAN not supported on this device)" );
|
||||
testInfNan = false;
|
||||
}
|
||||
}
|
||||
|
||||
if( testInfNan )
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_extended_limits, "test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( floatStream ), &floatStream );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
error = clEnqueueReadBuffer( queue, floatStream, CL_TRUE, 0, sizeof(floatOut), floatOut, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
TEST_FLOAT_ASSERTION( intOut[0] == 1, "INFINITY == HUGE_VALF", intOut[0] )
|
||||
TEST_FLOAT_ASSERTION( intOut[1] == 4, "sizeof( INFINITY ) == 4", intOut[1] )
|
||||
TEST_FLOAT_ASSERTION( intOut[2] == 1, "isinf( INFINITY ) == true", intOut[2] )
|
||||
TEST_FLOAT_ASSERTION( intOut[3] == 0, "isnormal( INFINITY ) == false", intOut[3] )
|
||||
TEST_FLOAT_ASSERTION( intOut[4] == 0, "isnan( INFINITY ) == false", intOut[4] )
|
||||
TEST_FLOAT_ASSERTION( intOut[5] == 1, "INFINITY > MAXFLOAT", intOut[5] )
|
||||
TEST_FLOAT_ASSERTION( intOut[6] == 1, "-INFINITY < -MAXFLOAT", intOut[6] )
|
||||
TEST_FLOAT_ASSERTION( intOut[7] == 1, "( MAXFLOAT + MAXFLOAT ) == INFINITY", intOut[7] )
|
||||
TEST_FLOAT_ASSERTION( intOut[8] == 1, "nextafter( MAXFLOAT, INFINITY ) == INFINITY", intOut[8] )
|
||||
TEST_FLOAT_ASSERTION( intOut[9] == 1, "nextafter( -MAXFLOAT, -INFINITY ) == -INFINITY", intOut[9] )
|
||||
TEST_FLOAT_ASSERTION( intOut[10] == 1, "INFINITY = INFINITY", intOut[10] )
|
||||
TEST_FLOAT_ASSERTION( intOut[11] == 1, "asuint( INFINITY ) == 0x7f800000", intOut[11] )
|
||||
TEST_FLOAT_ASSERTION( *( (uint32_t *)&floatOut[0] ) == 0x7f800000, "asuint( INFINITY ) == 0x7f800000", floatOut[0] )
|
||||
TEST_FLOAT_ASSERTION( floatOut[1] == INFINITY, "INFINITY == INFINITY", floatOut[1] )
|
||||
|
||||
TEST_FLOAT_ASSERTION( intOut[12] == 4, "sizeof( HUGE_VALF ) == 4", intOut[12] )
|
||||
TEST_FLOAT_ASSERTION( intOut[13] == 1, "HUGE_VALF == INFINITY", intOut[13] )
|
||||
TEST_FLOAT_ASSERTION( floatOut[1] == HUGE_VALF, "HUGE_VALF == HUGE_VALF", floatOut[1] )
|
||||
|
||||
TEST_FLOAT_ASSERTION( intOut[14] == 0, "(NAN == NAN) = false", intOut[14] )
|
||||
TEST_FLOAT_ASSERTION( intOut[15] == 1, "(NAN != NAN) = true", intOut[15] )
|
||||
TEST_FLOAT_ASSERTION( intOut[16] == 1, "isnan( NAN ) = true", intOut[16] )
|
||||
TEST_FLOAT_ASSERTION( intOut[17] == 0, "isinf( NAN ) = false", intOut[17] )
|
||||
TEST_FLOAT_ASSERTION( intOut[18] == 0, "isnormal( NAN ) = false", intOut[18] )
|
||||
TEST_FLOAT_ASSERTION( intOut[19] == 1, "( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000", intOut[19] )
|
||||
TEST_FLOAT_ASSERTION( intOut[20] == 4, "sizeof( NAN ) = 4", intOut[20] )
|
||||
TEST_FLOAT_ASSERTION( ( *( (uint32_t *)&floatOut[2] ) & 0x7fffffff ) > 0x7f800000, "( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000", floatOut[2] )
|
||||
|
||||
TEST_FLOAT_ASSERTION( intOut[ 21 ] == 1, "isnan( INFINITY / INFINITY ) = true", intOut[ 21 ] )
|
||||
TEST_FLOAT_ASSERTION( intOut[ 22 ] == 1, "isnan( INFINITY - INFINITY ) = true", intOut[ 22 ] )
|
||||
TEST_FLOAT_ASSERTION( intOut[ 23 ] == 1, "isnan( 0.f / 0.f ) = true", intOut[ 23 ] )
|
||||
TEST_FLOAT_ASSERTION( intOut[ 24 ] == 1, "isnan( INFINITY * 0.f ) = true", intOut[ 24 ] )
|
||||
TEST_FLOAT_ASSERTION( intOut[ 25 ] == 0, "( INFINITY == NAN ) = false", intOut[ 25 ] )
|
||||
TEST_FLOAT_ASSERTION( intOut[ 26 ] == 0, "(-INFINITY == NAN ) = false", intOut[ 26 ] )
|
||||
TEST_FLOAT_ASSERTION( intOut[ 27 ] == 0, "( INFINITY > NAN ) = false", intOut[ 27 ] )
|
||||
TEST_FLOAT_ASSERTION( intOut[ 28 ] == 0, "(-INFINITY < NAN ) = false", intOut[ 28 ] )
|
||||
TEST_FLOAT_ASSERTION( intOut[ 29 ] == 1, "( INFINITY != NAN ) = true", intOut[ 29 ] )
|
||||
TEST_FLOAT_ASSERTION( intOut[ 30 ] == 0, "( NAN < INFINITY ) = false", intOut[ 30 ] )
|
||||
TEST_FLOAT_ASSERTION( intOut[ 31 ] == 0, "( NAN > -INFINITY ) = false", intOut[ 31 ] )
|
||||
}
|
||||
|
||||
// Stage 3: limits on HUGE_VAL (double)
|
||||
if( !is_extension_available( deviceID, "cl_khr_fp64" ) )
|
||||
log_info( "Note: Skipping double HUGE_VAL tests (doubles unsupported on device)\n" );
|
||||
else
|
||||
{
|
||||
cl_device_fp_config config = 0;
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof( config ), &config, NULL );
|
||||
test_error( error, "Unable to run INFINITY/NAN tests (unable to get double FP_CONFIG bits)" );
|
||||
|
||||
if( ( config & CL_FP_INF_NAN ) == 0 )
|
||||
log_info( "Skipping HUGE_VAL tests (INF/NAN not supported on this device)" );
|
||||
else
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_double_limits, "test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
doubleStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(doubleOut), NULL, &error );
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( doubleStream ), &doubleStream );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
error = clEnqueueReadBuffer( queue, doubleStream, CL_TRUE, 0, sizeof(doubleOut), doubleOut, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
TEST_DOUBLE_ASSERTION( intOut[0] == 8, "sizeof( HUGE_VAL ) = 8", intOut[0] )
|
||||
TEST_DOUBLE_ASSERTION( intOut[1] == 1, "HUGE_VAL = INFINITY", intOut[1] )
|
||||
TEST_DOUBLE_ASSERTION( intOut[2] == 1, "isinf( HUGE_VAL ) = true", intOut[2] )
|
||||
TEST_DOUBLE_ASSERTION( intOut[3] == 0, "isnormal( HUGE_VAL ) = false", intOut[3] )
|
||||
TEST_DOUBLE_ASSERTION( intOut[4] == 0, "isnan( HUGE_VAL ) = false", intOut[4] )
|
||||
TEST_DOUBLE_ASSERTION( intOut[5] == 1, "HUGE_VAL = HUGE_VAL", intOut[5] )
|
||||
TEST_DOUBLE_ASSERTION( intOut[6] == 1, "as_ulong( HUGE_VAL ) = 0x7ff0000000000000UL", intOut[6] )
|
||||
TEST_DOUBLE_ASSERTION( *( (uint64_t *)&doubleOut[0] ) == 0x7ff0000000000000ULL, "as_ulong( HUGE_VAL ) = 0x7ff0000000000000UL", doubleOut[0] )
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
141
test_conformance/basic/test_pointercast.c
Normal file
141
test_conformance/basic/test_pointercast.c
Normal file
@@ -0,0 +1,141 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *pointer_cast_kernel_code =
|
||||
"__kernel void test_pointer_cast(__global unsigned char *src, __global unsigned int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" __global unsigned int *p = (__global unsigned int *)src;\n"
|
||||
"\n"
|
||||
" dst[tid] = p[tid];\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
int
|
||||
verify_pointer_cast(unsigned char *inptr, unsigned int *outptr, int n)
|
||||
{
|
||||
unsigned int *p = (unsigned int *)inptr;
|
||||
int i;
|
||||
cl_uint r;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = p[i];
|
||||
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error("POINTER_CAST test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("POINTER_CAST test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_pointer_cast(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
unsigned char *input_ptr;
|
||||
unsigned int *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
size_t length = sizeof(int) * num_elements;
|
||||
input_ptr = (unsigned char*)malloc(length);
|
||||
output_ptr = (unsigned int*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0; i<num_elements*4; i++)
|
||||
input_ptr[i] = (unsigned char)genrand_int32(d);
|
||||
|
||||
free_mtdata(d);
|
||||
d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &pointer_cast_kernel_code, "test_pointer_cast" );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_pointer_cast(input_ptr, output_ptr, num_elements);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
393
test_conformance/basic/test_preprocessors.cpp
Normal file
393
test_conformance/basic/test_preprocessors.cpp
Normal file
@@ -0,0 +1,393 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include <ctype.h>
|
||||
|
||||
// Test __FILE__, __LINE__, __OPENCL_VERSION__, __OPENCL_C_VERSION__, __ENDIAN_LITTLE__, __ROUNDING_MODE__, __IMAGE_SUPPORT__, __FAST_RELAXED_MATH__
|
||||
// __kernel_exec
|
||||
|
||||
const char *preprocessor_test = {
|
||||
"#line 2 \"%s\"\n"
|
||||
"__kernel void test( __global int *results, __global char *outFileString, __global char *outRoundingString )\n"
|
||||
"{\n"
|
||||
|
||||
// Integer preprocessor macros
|
||||
"#ifdef __IMAGE_SUPPORT__\n"
|
||||
" results[0] = __IMAGE_SUPPORT__;\n"
|
||||
"#else\n"
|
||||
" results[0] = 0xf00baa;\n"
|
||||
"#endif\n"
|
||||
|
||||
"#ifdef __ENDIAN_LITTLE__\n"
|
||||
" results[1] = __ENDIAN_LITTLE__;\n"
|
||||
"#else\n"
|
||||
" results[1] = 0xf00baa;\n"
|
||||
"#endif\n"
|
||||
|
||||
"#ifdef __OPENCL_VERSION__\n"
|
||||
" results[2] = __OPENCL_VERSION__;\n"
|
||||
"#else\n"
|
||||
" results[2] = 0xf00baa;\n"
|
||||
"#endif\n"
|
||||
|
||||
"#ifdef __OPENCL_C_VERSION__\n"
|
||||
" results[3] = __OPENCL_C_VERSION__;\n"
|
||||
"#else\n"
|
||||
" results[3] = 0xf00baa;\n"
|
||||
"#endif\n"
|
||||
|
||||
"#ifdef __LINE__\n"
|
||||
" results[4] = __LINE__;\n"
|
||||
"#else\n"
|
||||
" results[4] = 0xf00baa;\n"
|
||||
"#endif\n"
|
||||
|
||||
#if 0 // Removed by Affie's request 2/24
|
||||
"#ifdef __FAST_RELAXED_MATH__\n"
|
||||
" results[5] = __FAST_RELAXED_MATH__;\n"
|
||||
"#else\n"
|
||||
" results[5] = 0xf00baa;\n"
|
||||
"#endif\n"
|
||||
#endif
|
||||
|
||||
"#ifdef __kernel_exec\n"
|
||||
" results[6] = 1;\n" // By spec, we can only really evaluate that it is defined, not what it expands to
|
||||
"#else\n"
|
||||
" results[6] = 0xf00baa;\n"
|
||||
"#endif\n"
|
||||
|
||||
// String preprocessor macros. Technically, there are strings in OpenCL, but not really.
|
||||
"#ifdef __FILE__\n"
|
||||
" int i;\n"
|
||||
" constant char *f = \"\" __FILE__;\n"
|
||||
" for( i = 0; f[ i ] != 0 && i < 512; i++ )\n"
|
||||
" outFileString[ i ] = f[ i ];\n"
|
||||
" outFileString[ i ] = 0;\n"
|
||||
"#else\n"
|
||||
" outFileString[ 0 ] = 0;\n"
|
||||
"#endif\n"
|
||||
|
||||
"}\n"
|
||||
};
|
||||
|
||||
int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 3 ];
|
||||
|
||||
int error;
|
||||
size_t threads[] = {1,1,1};
|
||||
|
||||
cl_int results[ 7 ];
|
||||
cl_char fileString[ 512 ] = "", roundingString[ 128 ] = "";
|
||||
char programSource[4096];
|
||||
char curFileName[512];
|
||||
char *programPtr = programSource;
|
||||
int i = 0;
|
||||
snprintf(curFileName, 512, "%s", __FILE__);
|
||||
#ifdef _WIN32
|
||||
// Replace "\" with "\\"
|
||||
while(curFileName[i] != '\0') {
|
||||
if (curFileName[i] == '\\') {
|
||||
int j = i + 1;
|
||||
char prev = '\\';
|
||||
while (curFileName[j - 1] != '\0') {
|
||||
char tmp = curFileName[j];
|
||||
curFileName[j] = prev;
|
||||
prev = tmp;
|
||||
j++;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
#endif
|
||||
sprintf(programSource,preprocessor_test,curFileName);
|
||||
|
||||
// Create the kernel
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(results), NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(fileString), NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(roundingString), NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
// Set up and run
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
}
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(results), results, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(fileString), fileString, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(roundingString), roundingString, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
|
||||
/////// Check the integer results
|
||||
|
||||
// We need to check these values against what we know is supported on the device
|
||||
if( checkForImageSupport( deviceID ) == 0 )
|
||||
{
|
||||
// If images are supported, the constant should have been defined to the value 1
|
||||
if( results[ 0 ] == 0xf00baa )
|
||||
{
|
||||
log_error( "ERROR: __IMAGE_SUPPORT__ undefined even though images are supported\n" );
|
||||
return -1;
|
||||
}
|
||||
else if( results[ 0 ] != 1 )
|
||||
{
|
||||
log_error( "ERROR: __IMAGE_SUPPORT__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", (int)results[ 0 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// If images aren't supported, the constant should be undefined
|
||||
if( results[ 0 ] != 0xf00baa )
|
||||
{
|
||||
log_error( "ERROR: __IMAGE_SUPPORT__ defined to value %d even though images aren't supported", (int)results[ 0 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// __ENDIAN_LITTLE__ is similar to __IMAGE_SUPPORT__: 1 if it's true, undefined if it isn't
|
||||
cl_bool deviceIsLittleEndian;
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_ENDIAN_LITTLE, sizeof( deviceIsLittleEndian ), &deviceIsLittleEndian, NULL );
|
||||
test_error( error, "Unable to get endian property of device to validate against" );
|
||||
|
||||
if( deviceIsLittleEndian )
|
||||
{
|
||||
if( results[ 1 ] == 0xf00baa )
|
||||
{
|
||||
log_error( "ERROR: __ENDIAN_LITTLE__ undefined even though the device is little endian\n" );
|
||||
return -1;
|
||||
}
|
||||
else if( results[ 1 ] != 1 )
|
||||
{
|
||||
log_error( "ERROR: __ENDIAN_LITTLE__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", (int)results[ 1 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( results[ 1 ] != 0xf00baa )
|
||||
{
|
||||
log_error( "ERROR: __ENDIAN_LITTLE__ defined to value %d even though the device is not little endian (should be undefined per spec)", (int)results[ 1 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// __OPENCL_VERSION__
|
||||
if( results[ 2 ] == 0xf00baa )
|
||||
{
|
||||
log_error( "ERROR: Kernel preprocessor __OPENCL_VERSION__ undefined!" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// The OpenCL version reported by the macro reports the feature level supported by the compiler. Since
|
||||
// this doesn't directly match any property we can query, we just check to see if it's a sane value
|
||||
char versionBuffer[ 128 ];
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( versionBuffer ), versionBuffer, NULL );
|
||||
test_error( error, "Unable to get device's version to validate against" );
|
||||
|
||||
// We need to parse to get the version number to compare against
|
||||
char *p1, *p2, *p3;
|
||||
for( p1 = versionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
|
||||
;
|
||||
for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
|
||||
;
|
||||
for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
|
||||
;
|
||||
|
||||
if( p2 == p3 )
|
||||
{
|
||||
log_error( "ERROR: Unable to verify OpenCL version string (platform string is incorrect format)\n" );
|
||||
return -1;
|
||||
}
|
||||
*p2 = 0;
|
||||
*p3 = 0;
|
||||
int major = atoi( p1 );
|
||||
int minor = atoi( p2 + 1 );
|
||||
int realVersion = ( major * 100 ) + ( minor * 10 );
|
||||
if( ( results[ 2 ] < 100 ) || ( results[ 2 ] > realVersion ) )
|
||||
{
|
||||
log_error( "ERROR: Kernel preprocessor __OPENCL_VERSION__ does not make sense w.r.t. device's version string! "
|
||||
"(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// __OPENCL_C_VERSION__
|
||||
if( results[ 3 ] == 0xf00baa )
|
||||
{
|
||||
log_error( "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ undefined!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// The OpenCL C version reported by the macro reports the OpenCL C supported by the compiler for this OpenCL device.
|
||||
char cVersionBuffer[ 128 ];
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( cVersionBuffer ), cVersionBuffer, NULL );
|
||||
test_error( error, "Unable to get device's OpenCL C version to validate against" );
|
||||
|
||||
// We need to parse to get the version number to compare against
|
||||
for( p1 = cVersionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
|
||||
;
|
||||
for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
|
||||
;
|
||||
for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
|
||||
;
|
||||
|
||||
if( p2 == p3 )
|
||||
{
|
||||
log_error( "ERROR: Unable to verify OpenCL C version string (platform string is incorrect format)\n" );
|
||||
return -1;
|
||||
}
|
||||
*p2 = 0;
|
||||
*p3 = 0;
|
||||
major = atoi( p1 );
|
||||
minor = atoi( p2 + 1 );
|
||||
realVersion = ( major * 100 ) + ( minor * 10 );
|
||||
if( ( results[ 3 ] < 100 ) || ( results[ 3 ] > realVersion ) )
|
||||
{
|
||||
log_error( "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ does not make sense w.r.t. device's version string! "
|
||||
"(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// __LINE__
|
||||
if( results[ 4 ] == 0xf00baa )
|
||||
{
|
||||
log_error( "ERROR: Kernel preprocessor __LINE__ undefined!" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// This is fun--we get to search for where __LINE__ actually is so we know what line it should define to!
|
||||
// Note: it shows up twice, once for the #ifdef, and the other for the actual result output
|
||||
const char *linePtr = strstr( preprocessor_test, "__LINE__" );
|
||||
if( linePtr == NULL )
|
||||
{
|
||||
log_error( "ERROR: Nonsensical NULL pointer encountered!" );
|
||||
return -2;
|
||||
}
|
||||
linePtr = strstr( linePtr + strlen( "__LINE__" ), "__LINE__" );
|
||||
if( linePtr == NULL )
|
||||
{
|
||||
log_error( "ERROR: Nonsensical NULL pointer encountered!" );
|
||||
return -2;
|
||||
}
|
||||
|
||||
// Now count how many carriage returns are before the string
|
||||
const char *retPtr = strchr( preprocessor_test, '\n' );
|
||||
int retCount = 1;
|
||||
for( ; ( retPtr < linePtr ) && ( retPtr != NULL ); retPtr = strchr( retPtr + 1, '\n' ) )
|
||||
retCount++;
|
||||
|
||||
if( retCount != results[ 4 ] )
|
||||
{
|
||||
log_error( "ERROR: Kernel preprocessor __LINE__ does not expand to the actual line number! (expanded to %d, but was on line %d)\n",
|
||||
results[ 4 ], retCount );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if 0 // Removed by Affie's request 2/24
|
||||
// __FAST_RELAXED_MATH__
|
||||
// Since create_single_kernel_helper does NOT define -cl-fast-relaxed-math, this should be undefined
|
||||
if( results[ 5 ] != 0xf00baa )
|
||||
{
|
||||
log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ defined even though build option was not used (should be undefined)\n" );
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
// __kernel_exec
|
||||
// We can ONLY check to verify that it is defined
|
||||
if( results[ 6 ] == 0xf00baa )
|
||||
{
|
||||
log_error( "ERROR: Kernel preprocessor __kernel_exec must be defined\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
//// String preprocessors
|
||||
|
||||
// Since we provided the program directly, __FILE__ should compile to "<program source>".
|
||||
if( fileString[ 0 ] == 0 )
|
||||
{
|
||||
log_error( "ERROR: Kernel preprocessor __FILE__ undefined!\n" );
|
||||
return -1;
|
||||
}
|
||||
else if( strncmp( (char *)fileString, __FILE__, 512 ) != 0 )
|
||||
{
|
||||
log_info( "WARNING: __FILE__ defined, but to an unexpected value (%s)\n\tShould be: \"%s\"", fileString, __FILE__ );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
#if 0 // Removed by Affie's request 2/24
|
||||
// One more try through: try with -cl-fast-relaxed-math to make sure the appropriate preprocessor gets defined
|
||||
clProgramWrapper programB = clCreateProgramWithSource( context, 1, preprocessor_test, NULL, &error );
|
||||
test_error( error, "Unable to create test program" );
|
||||
|
||||
// Try compiling
|
||||
error = clBuildProgram( programB, 1, &deviceID, "-cl-fast-relaxed-math", NULL, NULL );
|
||||
test_error( error, "Unable to build program" );
|
||||
|
||||
// Create a kernel again to run against
|
||||
clKernelWrapper kernelB = clCreateKernel( programB, "test", &error );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
// Set up and run
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
error = clSetKernelArg( kernelB, i, sizeof( streams[i] ), &streams[i] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
}
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernelB, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
// Only need the one read
|
||||
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(results), results, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
// We only need to check the one result this time
|
||||
if( results[ 5 ] == 0xf00baa )
|
||||
{
|
||||
log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ not defined!\n" );
|
||||
return -1;
|
||||
}
|
||||
else if( results[ 5 ] != 1 )
|
||||
{
|
||||
log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ not defined to 1 (was %d)\n", results[ 5 ] );
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
1567
test_conformance/basic/test_progvar.cpp
Normal file
1567
test_conformance/basic/test_progvar.cpp
Normal file
File diff suppressed because it is too large
Load Diff
358
test_conformance/basic/test_queue_priority.c
Normal file
358
test_conformance/basic/test_queue_priority.c
Normal file
@@ -0,0 +1,358 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include "../../test_common/harness/rounding_mode.h"
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *fpadd_kernel_code =
|
||||
"__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] + srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
static const char *fpsub_kernel_code =
|
||||
"__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] - srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
static const char *fpmul_kernel_code =
|
||||
"__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = srcA[tid] * srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static const float MAX_ERR = 1e-5f;
|
||||
|
||||
static int
|
||||
verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
|
||||
{
|
||||
float r;
|
||||
int i;
|
||||
|
||||
float * reference_ptr = (float *)malloc(n * sizeof(float));
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
reference_ptr[i] = inptrA[i] + inptrB[i];
|
||||
}
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
if (reference_ptr[i] != outptr[i])
|
||||
{
|
||||
log_error("FP_ADD float test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
free(reference_ptr);
|
||||
|
||||
log_info("FP_ADD float test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
|
||||
{
|
||||
float r;
|
||||
int i;
|
||||
|
||||
float * reference_ptr = (float *)malloc(n * sizeof(float));
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
reference_ptr[i] = inptrA[i] - inptrB[i];
|
||||
}
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
if (reference_ptr[i] != outptr[i])
|
||||
{
|
||||
log_error("FP_SUB float test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
free(reference_ptr);
|
||||
|
||||
log_info("FP_SUB float test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
|
||||
{
|
||||
float r;
|
||||
int i;
|
||||
|
||||
float * reference_ptr = (float *)malloc(n * sizeof(float));
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
reference_ptr[i] = inptrA[i] * inptrB[i];
|
||||
}
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
if (reference_ptr[i] != outptr[i])
|
||||
{
|
||||
log_error("FP_MUL float test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
free(reference_ptr);
|
||||
|
||||
log_info("FP_MUL float test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
|
||||
int test_queue_priority(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int err;
|
||||
int command_queue_priority = 0;
|
||||
int command_queue_select_compute_units = 0;
|
||||
|
||||
cl_queue_properties queue_properties[] = { CL_QUEUE_PROPERTIES, 0, 0, 0, 0, 0, 0 };
|
||||
int idx = 2;
|
||||
|
||||
// Check to see if queue priority is supported
|
||||
if (((command_queue_priority = is_extension_available(device, "cl_APPLE_command_queue_priority"))) == 0)
|
||||
{
|
||||
log_info("cl_APPLE_command_queue_priority extension is not supported - skipping test\n");
|
||||
}
|
||||
|
||||
// Check to see if selecting the number of compute units is supported
|
||||
if (((command_queue_select_compute_units = is_extension_available(device, "cl_APPLE_command_queue_select_compute_units"))) == 0)
|
||||
{
|
||||
log_info("cl_APPLE_command_queue_select_compute_units extension is not supported - skipping test\n");
|
||||
}
|
||||
|
||||
// If neither extension is supported, skip the test
|
||||
if (!command_queue_priority && !command_queue_select_compute_units)
|
||||
return 0;
|
||||
|
||||
// Setup the queue properties
|
||||
#ifdef cl_APPLE_command_queue_priority
|
||||
if (command_queue_priority) {
|
||||
queue_properties[idx++] = CL_QUEUE_PRIORITY_APPLE;
|
||||
queue_properties[idx++] = CL_QUEUE_PRIORITY_BACKGROUND_APPLE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef cl_APPLE_command_queue_select_compute_units
|
||||
// Check the number of compute units on the device
|
||||
cl_uint num_compute_units = 0;
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( num_compute_units ), &num_compute_units, NULL );
|
||||
if (err) {
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed: %d", err);
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (command_queue_select_compute_units) {
|
||||
queue_properties[idx++] = CL_QUEUE_NUM_COMPUTE_UNITS_APPLE;
|
||||
queue_properties[idx++] = num_compute_units/2;
|
||||
}
|
||||
#endif
|
||||
queue_properties[idx++] = 0;
|
||||
|
||||
// Create the command queue
|
||||
cl_command_queue background_queue = clCreateCommandQueueWithProperties(context, device, queue_properties, &err);
|
||||
if (err) {
|
||||
log_error("clCreateCommandQueueWithPropertiesAPPLE failed: %d", err);
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Test the command queue
|
||||
cl_mem streams[4];
|
||||
cl_program program[3];
|
||||
cl_kernel kernel[3];
|
||||
cl_event marker_event;
|
||||
|
||||
float *input_ptr[3], *output_ptr, *p;
|
||||
size_t threads[1];
|
||||
int i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
size_t length = sizeof(cl_float) * num_elements;
|
||||
int isRTZ = 0;
|
||||
RoundingMode oldMode = kDefaultRoundingMode;
|
||||
|
||||
// check for floating point capabilities
|
||||
cl_device_fp_config single_config = 0;
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
|
||||
if (err) {
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
//If we only support rtz mode
|
||||
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
|
||||
{
|
||||
//Check to make sure we are an embedded device
|
||||
char profile[32];
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
|
||||
if( err )
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
|
||||
{
|
||||
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
isRTZ = 1;
|
||||
oldMode = get_round();
|
||||
}
|
||||
|
||||
input_ptr[0] = (cl_float*)malloc(length);
|
||||
input_ptr[1] = (cl_float*)malloc(length);
|
||||
input_ptr[2] = (cl_float*)malloc(length);
|
||||
output_ptr = (cl_float*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
p = input_ptr[2];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueWriteBuffer failed.");
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueWriteBuffer failed.");
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueWriteBuffer failed.");
|
||||
|
||||
err = clEnqueueMarkerWithWaitList(queue, 0, NULL, &marker_event);
|
||||
test_error( err, "clEnqueueMarkerWithWaitList failed.");
|
||||
clFlush(queue);
|
||||
|
||||
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
|
||||
test_error( err, "create_single_kernel_helper failed");
|
||||
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
|
||||
test_error( err, "clSetKernelArgs failed.");
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 1, &marker_event, NULL);
|
||||
test_error( err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
test_error( err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
if( isRTZ )
|
||||
set_round( kRoundTowardZero, kfloat );
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
|
||||
break;
|
||||
}
|
||||
|
||||
if( isRTZ )
|
||||
set_round( oldMode, kfloat );
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseCommandQueue(background_queue);
|
||||
clReleaseEvent(marker_event);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
free_mtdata( d );
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
297
test_conformance/basic/test_readimage.c
Normal file
297
test_conformance/basic/test_readimage.c
Normal file
@@ -0,0 +1,297 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *bgra8888_kernel_code =
|
||||
"\n"
|
||||
"__kernel void test_bgra8888(read_only image2d_t srcimg, __global uchar4 *dst, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y)) * 255.0f;\n"
|
||||
" dst[indx] = convert_uchar4_rte(color.zyxw);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static const char *rgba8888_kernel_code =
|
||||
"\n"
|
||||
"__kernel void test_rgba8888(read_only image2d_t srcimg, __global uchar4 *dst, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y)) * 255.0f;\n"
|
||||
" dst[indx] = convert_uchar4_rte(color);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static unsigned char *
|
||||
generate_8888_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32( d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_bgra8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("READ_IMAGE_BGRA_UNORM_INT8 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("READ_IMAGE_BGRA_UNORM_INT8 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgba8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("READ_IMAGE_RGBA_UNORM_INT8 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("READ_IMAGE_RGBA_UNORM_INT8 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int test_readimage(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[3];
|
||||
cl_program program[2];
|
||||
cl_kernel kernel[2];
|
||||
cl_image_format img_format;
|
||||
cl_image_format *supported_formats;
|
||||
unsigned char *input_ptr[2], *output_ptr;
|
||||
size_t threads[2];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int i, err;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {img_width, img_height, 1};
|
||||
size_t length = img_width * img_height * 4 * sizeof(unsigned char);
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
int supportsBGRA = 0;
|
||||
cl_uint numFormats = 0;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
input_ptr[0] = generate_8888_image(img_width, img_height, d);
|
||||
input_ptr[1] = generate_8888_image(img_width, img_height, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
output_ptr = (unsigned char*)malloc(length);
|
||||
|
||||
if(gIsEmbedded)
|
||||
{
|
||||
/* Get the supported image formats to see if BGRA is supported */
|
||||
clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &numFormats);
|
||||
supported_formats = (cl_image_format *) malloc(sizeof(cl_image_format) * numFormats);
|
||||
clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, numFormats, supported_formats, NULL);
|
||||
|
||||
for(i = 0; i < numFormats; i++)
|
||||
{
|
||||
if(supported_formats[i].image_channel_order == CL_BGRA)
|
||||
{
|
||||
supportsBGRA = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
supportsBGRA = 1;
|
||||
}
|
||||
|
||||
if(supportsBGRA)
|
||||
{
|
||||
img_format.image_channel_order = CL_BGRA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = clCreateImage2D(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateImage2D failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[1] = clCreateImage2D(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateImage2D failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(supportsBGRA)
|
||||
{
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteImage failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
err = clEnqueueWriteImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, input_ptr[1], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteImage failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(supportsBGRA)
|
||||
{
|
||||
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_kernel_code, "test_bgra8888" );
|
||||
if (err)
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_kernel_code, "test_rgba8888" );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
if(supportsBGRA)
|
||||
{
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArg failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof sampler, &sampler);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArg failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)img_width;
|
||||
threads[1] = (unsigned int)img_height;
|
||||
|
||||
for (i=0; i<2; i++)
|
||||
{
|
||||
if(i == 0 && !supportsBGRA)
|
||||
continue;
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_bgra8888_image(input_ptr[i], output_ptr, img_width, img_height);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_rgba8888_image(input_ptr[i], output_ptr, img_width, img_height);
|
||||
break;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseSampler(sampler);
|
||||
|
||||
if(supportsBGRA)
|
||||
clReleaseMemObject(streams[0]);
|
||||
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
for (i=0; i<2; i++)
|
||||
{
|
||||
if(i == 0 && !supportsBGRA)
|
||||
continue;
|
||||
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
237
test_conformance/basic/test_readimage3d.c
Normal file
237
test_conformance/basic/test_readimage3d.c
Normal file
@@ -0,0 +1,237 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *bgra8888_kernel_code =
|
||||
"\n"
|
||||
"__kernel void test_bgra8888(read_only image3d_t srcimg, __global float4 *dst, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int tid_z = get_global_id(2);\n"
|
||||
" int indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
|
||||
" dst[indx].x = color.z;\n"
|
||||
" dst[indx].y = color.y;\n"
|
||||
" dst[indx].z = color.x;\n"
|
||||
" dst[indx].w = color.w;\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static const char *rgba8888_kernel_code =
|
||||
"\n"
|
||||
"__kernel void test_rgba8888(read_only image3d_t srcimg, __global float4 *dst, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int tid_z = get_global_id(2);\n"
|
||||
" int indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
|
||||
" //indx *= 4;\n"
|
||||
" dst[indx].x = color.x;\n"
|
||||
" dst[indx].y = color.y;\n"
|
||||
" dst[indx].z = color.z;\n"
|
||||
" dst[indx].w = color.w;\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static unsigned char *
|
||||
generate_3d_image8(int w, int h, int d, MTdata data)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * d * 4);
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*d*4; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(data);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_3d_image8(double *image, float *outptr, int w, int h, int d)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*d*4; i++)
|
||||
{
|
||||
if (outptr[i] != (float)image[i])
|
||||
{
|
||||
float ulps = Ulp_Error( outptr[i], image[i]);
|
||||
|
||||
if(! (fabsf(ulps) < 1.5f) )
|
||||
{
|
||||
log_error( "ERROR: Data sample %d does not validate! Expected (%a), got (%a), ulp %f\n",
|
||||
(int)i, image[i], outptr[ i ], ulps );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static double *
|
||||
prepare_reference(unsigned char * input_ptr, int w, int h, int d)
|
||||
{
|
||||
double *ptr = (double*)malloc(w * h * d * 4 * sizeof(double));
|
||||
int i;
|
||||
for (i=0; i<w*h*d*4; i++)
|
||||
ptr[i] = ((double)input_ptr[i]/255);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
||||
int test_readimage3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[3];
|
||||
cl_program program[2];
|
||||
cl_kernel kernel[2];
|
||||
cl_image_format img_format;
|
||||
unsigned char *input_ptr[2];
|
||||
float *output_ptr;
|
||||
double *ref_ptr[2];
|
||||
size_t threads[3];
|
||||
int img_width = 64;
|
||||
int img_height = 64;
|
||||
int img_depth = 64;
|
||||
int i, err;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {img_width, img_height, img_depth};
|
||||
size_t length = img_width * img_height * img_depth * 4 * sizeof(float);
|
||||
|
||||
|
||||
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
|
||||
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
input_ptr[0] = generate_3d_image8(img_width, img_height, img_depth, d);
|
||||
input_ptr[1] = generate_3d_image8(img_width, img_height, img_depth, d);
|
||||
ref_ptr[0] = prepare_reference(input_ptr[0], img_width, img_height, img_depth);
|
||||
ref_ptr[1] = prepare_reference(input_ptr[1], img_width, img_height, img_depth);
|
||||
free_mtdata(d); d = NULL;
|
||||
output_ptr = (float*)malloc(length);
|
||||
|
||||
img_format.image_channel_order = CL_BGRA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr[0], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteImage failed");
|
||||
|
||||
err = clEnqueueWriteImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, input_ptr[1], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteImage failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_kernel_code, "test_bgra8888" );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_kernel_code, "test_rgba8888" );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[1], 2, sizeof sampler, &sampler);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
threads[0] = (unsigned int)img_width;
|
||||
threads[1] = (unsigned int)img_height;
|
||||
threads[2] = (unsigned int)img_depth;
|
||||
|
||||
for (i=0; i<2; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 3, NULL, threads, NULL, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_3d_image8(ref_ptr[i], output_ptr, img_width, img_height, img_depth);
|
||||
if ( err != 0 )
|
||||
log_info("READ_IMAGE3D_BGRA_UNORM_INT8 test passed\n");
|
||||
break;
|
||||
case 1:
|
||||
err = verify_3d_image8(ref_ptr[i], output_ptr, img_width, img_height, img_depth);
|
||||
if ( err != 0 )
|
||||
log_info("READ_IMAGE3D_RGBA_UNORM_INT8 test passed\n");
|
||||
break;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
for (i=0; i<2; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(output_ptr);
|
||||
free(ref_ptr[0]);
|
||||
free(ref_ptr[1]);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
153
test_conformance/basic/test_readimage3d_fp32.c
Normal file
153
test_conformance/basic/test_readimage3d_fp32.c
Normal file
@@ -0,0 +1,153 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
|
||||
static const char *rgbaFFFF_kernel_code =
|
||||
"__kernel void test_rgbaFFFF(read_only image3d_t srcimg, __global float *dst, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int tid_z = get_global_id(2);\n"
|
||||
" int indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
|
||||
" indx *= 4;\n"
|
||||
" dst[indx+0] = color.x;\n"
|
||||
" dst[indx+1] = color.y;\n"
|
||||
" dst[indx+2] = color.z;\n"
|
||||
" dst[indx+3] = color.w;\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static float *
|
||||
generate_float_image(int w, int h, int d, MTdata data)
|
||||
{
|
||||
float *ptr = (float*)malloc(w * h * d * 4 * sizeof(float));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*d*4; i++)
|
||||
ptr[i] = get_random_float(-0x40000000, 0x40000000, data);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_float_image(float *image, float *outptr, int w, int h, int d)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*d*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("READ_IMAGE3D_RGBA_FLOAT test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("READ_IMAGE3D_RGBA_FLOAT test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int test_readimage3d_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
cl_image_format img_format;
|
||||
float *input_ptr, *output_ptr;
|
||||
size_t threads[3];
|
||||
int img_width = 64;
|
||||
int img_height = 64;
|
||||
int img_depth = 64;
|
||||
int err;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {img_width, img_height, img_depth};
|
||||
size_t length = img_width * img_height * img_depth * 4 * sizeof(float);
|
||||
|
||||
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
|
||||
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
input_ptr = generate_float_image(img_width, img_height, img_depth, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
output_ptr = (float*)malloc(length);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteImage failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &rgbaFFFF_kernel_code, "test_rgbaFFFF" );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
threads[0] = (unsigned int)img_width;
|
||||
threads[1] = (unsigned int)img_height;
|
||||
threads[2] = (unsigned int)img_depth;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
err = verify_float_image(input_ptr, output_ptr, img_width, img_height, img_depth);
|
||||
|
||||
// cleanup
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
152
test_conformance/basic/test_readimage3d_int16.c
Normal file
152
test_conformance/basic/test_readimage3d_int16.c
Normal file
@@ -0,0 +1,152 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *rgba16_kernel_code =
|
||||
"__kernel void test_rgba16(read_only image3d_t srcimg, __global ushort4 *dst, sampler_t sampler)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int tid_z = get_global_id(2);\n"
|
||||
" int indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
|
||||
" ushort4 dst_write;\n"
|
||||
" dst_write.x = convert_ushort_rte(color.x * 65535.0f);\n"
|
||||
" dst_write.y = convert_ushort_rte(color.y * 65535.0f);\n"
|
||||
" dst_write.z = convert_ushort_rte(color.z * 65535.0f);\n"
|
||||
" dst_write.w = convert_ushort_rte(color.w * 65535.0f);\n"
|
||||
" dst[indx] = dst_write;\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static unsigned short *
|
||||
generate_16bit_image(int w, int h, int d, MTdata data)
|
||||
{
|
||||
unsigned short *ptr = (cl_ushort*)malloc(w * h * d * 4 * sizeof(cl_ushort));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*d*4; i++)
|
||||
ptr[i] = (cl_ushort)genrand_int32(data);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_16bit_image(cl_ushort *image, cl_ushort *outptr, int w, int h, int d)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*d*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("READ_IMAGE3D_RGBA_UNORM_INT16 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("READ_IMAGE3D_RGBA_UNORM_INT16 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_readimage3d_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
cl_image_format img_format;
|
||||
cl_ushort *input_ptr, *output_ptr;
|
||||
size_t threads[3];
|
||||
int img_width = 64;
|
||||
int img_height = 64;
|
||||
int img_depth = 64;
|
||||
int err;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {img_width, img_height, img_depth};
|
||||
size_t length = img_width * img_height * img_depth * 4 * sizeof(cl_ushort);
|
||||
|
||||
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
|
||||
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
input_ptr = generate_16bit_image(img_width, img_height, img_depth, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
output_ptr = (cl_ushort*)malloc(length);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteImage failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba16_kernel_code, "test_rgba16" );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
threads[0] = (unsigned int)img_width;
|
||||
threads[1] = (unsigned int)img_height;
|
||||
threads[2] = (unsigned int)img_depth;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
err = verify_16bit_image(input_ptr, output_ptr, img_width, img_height, img_depth);
|
||||
|
||||
// cleanup
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
173
test_conformance/basic/test_readimage_fp32.c
Normal file
173
test_conformance/basic/test_readimage_fp32.c
Normal file
@@ -0,0 +1,173 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
|
||||
static const char *rgbaFFFF_kernel_code =
|
||||
"__kernel void test_rgbaFFFF(read_only image2d_t srcimg, __global float *dst, sampler_t smp)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, smp, (int2)(tid_x, tid_y));\n"
|
||||
" indx *= 4;\n"
|
||||
" dst[indx+0] = color.x;\n"
|
||||
" dst[indx+1] = color.y;\n"
|
||||
" dst[indx+2] = color.z;\n"
|
||||
" dst[indx+3] = color.w;\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static float *
|
||||
generate_float_image(int w, int h, MTdata d)
|
||||
{
|
||||
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_float_image(float *image, float *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("READ_IMAGE_RGBA_FLOAT test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("READ_IMAGE_RGBA_FLOAT test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_readimage_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
cl_image_format img_format;
|
||||
float *input_ptr, *output_ptr;
|
||||
size_t threads[2];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int err;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {img_width, img_height, 1};
|
||||
size_t length = img_width * img_height * 4 * sizeof(float);
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
input_ptr = generate_float_image(img_width, img_height, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
output_ptr = (float*)malloc(length);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &rgbaFFFF_kernel_code, "test_rgbaFFFF" );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)img_width;
|
||||
threads[1] = (unsigned int)img_height;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_float_image(input_ptr, output_ptr, img_width, img_height);
|
||||
|
||||
// cleanup
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
172
test_conformance/basic/test_readimage_int16.c
Normal file
172
test_conformance/basic/test_readimage_int16.c
Normal file
@@ -0,0 +1,172 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *rgba16_kernel_code =
|
||||
"__kernel void test_rgba16(read_only image2d_t srcimg, __global ushort4 *dst, sampler_t smp)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, smp, (int2)(tid_x, tid_y));\n"
|
||||
" ushort4 dst_write;\n"
|
||||
" dst_write.x = convert_ushort_rte(color.x * 65535.0f);\n"
|
||||
" dst_write.y = convert_ushort_rte(color.y * 65535.0f);\n"
|
||||
" dst_write.z = convert_ushort_rte(color.z * 65535.0f);\n"
|
||||
" dst_write.w = convert_ushort_rte(color.w * 65535.0f);\n"
|
||||
" dst[indx] = dst_write;\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static unsigned short *
|
||||
generate_16bit_image(int w, int h, MTdata d)
|
||||
{
|
||||
cl_ushort *ptr = (cl_ushort*)malloc(w * h * 4 * sizeof(cl_ushort));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (cl_ushort)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_16bit_image(cl_ushort *image, cl_ushort *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("READ_IMAGE_RGBA_UNORM_INT16 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("READ_IMAGE_RGBA_UNORM_INT16 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_readimage_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
cl_image_format img_format;
|
||||
cl_ushort *input_ptr, *output_ptr;
|
||||
size_t threads[2];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int err;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {img_width, img_height, 1};
|
||||
size_t length = img_width * img_height * 4 * sizeof(cl_ushort);
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
input_ptr = generate_16bit_image(img_width, img_height, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
output_ptr = (cl_ushort*)malloc(length);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba16_kernel_code, "test_rgba16" );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0 };
|
||||
cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
|
||||
test_error(err, "clCreateSamplerWithProperties failed");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)img_width;
|
||||
threads[1] = (unsigned int)img_height;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_16bit_image(input_ptr, output_ptr, img_width, img_height);
|
||||
|
||||
// cleanup
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
167
test_conformance/basic/test_rw_image_access_qualifier.c
Normal file
167
test_conformance/basic/test_rw_image_access_qualifier.c
Normal file
@@ -0,0 +1,167 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/clImageHelper.h"
|
||||
|
||||
static const char* rw_kernel_code =
|
||||
"kernel void test_rw_images(read_write image2d_t src_image) {\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
"\n"
|
||||
" int2 coords = (int2)(tid_x, tid_y);\n"
|
||||
"\n"
|
||||
" uint4 src_val = read_imageui(src_image, coords);\n"
|
||||
" src_val += 3;\n"
|
||||
"\n"
|
||||
" // required to ensure that following read from image at\n"
|
||||
" // location coord returns the latest color value.\n"
|
||||
" atomic_work_item_fence(CLK_IMAGE_MEM_FENCE,\n"
|
||||
" memory_order_acq_rel,\n"
|
||||
" memory_scope_work_item);\n"
|
||||
"\n"
|
||||
" write_imageui(src_image, coords, src_val);\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
int test_rw_image_access_qualifier(cl_device_id device_id, cl_context context, cl_command_queue commands, int num_elements)
|
||||
{
|
||||
|
||||
unsigned int i;
|
||||
|
||||
unsigned int size_x;
|
||||
unsigned int size_y;
|
||||
unsigned int size;
|
||||
|
||||
cl_int err;
|
||||
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
cl_mem_flags flags;
|
||||
cl_image_format format;
|
||||
cl_mem src_image;
|
||||
|
||||
unsigned int *input;
|
||||
unsigned int *output;
|
||||
|
||||
/* Create test input */
|
||||
size_x = 4;
|
||||
size_y = 4;
|
||||
size = size_x * size_y * 4;
|
||||
|
||||
input = (unsigned int *)malloc(size*sizeof(unsigned int));
|
||||
output = (unsigned int *)malloc(size*sizeof(unsigned int));
|
||||
|
||||
if (!input && !output) {
|
||||
log_error("Error: memory allocation failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Fill input array with random values */
|
||||
for (i = 0; i < size; i++) {
|
||||
input[i] = (unsigned int)(rand()/((double)RAND_MAX + 1)*255);
|
||||
}
|
||||
|
||||
/* Zero out output array */
|
||||
for (i = 0; i < size; i++) {
|
||||
output[i] = 0.0f;
|
||||
}
|
||||
|
||||
/* Build the program executable */
|
||||
err = create_single_kernel_helper_with_build_options(context,&program,&kernel,1,&rw_kernel_code,"test_rw_images", "-cl-std=CL2.0");
|
||||
if (err != CL_SUCCESS || !program) {
|
||||
log_error("Error: clCreateProgramWithSource failed\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Create arrays for input and output data */
|
||||
format.image_channel_order = CL_RGBA;
|
||||
format.image_channel_data_type = CL_UNSIGNED_INT32;
|
||||
|
||||
/* Create input image */
|
||||
flags = (cl_mem_flags) (CL_MEM_READ_WRITE
|
||||
| CL_MEM_COPY_HOST_PTR);
|
||||
src_image = create_image_2d(context, flags, &format,
|
||||
size_x, size_y, 0,
|
||||
(void *)input, &err);
|
||||
if (err != CL_SUCCESS || !src_image) {
|
||||
log_error("Error: clCreateImage2D failed\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Set kernel arguments */
|
||||
err = clSetKernelArg(kernel, 0, sizeof(src_image), &src_image);
|
||||
if (err != CL_SUCCESS) {
|
||||
log_error("Error: clSetKernelArg failed\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Set kernel execution parameters */
|
||||
int dim_count = 2;
|
||||
size_t global_dim[2];
|
||||
size_t local_dim[2];
|
||||
|
||||
global_dim[0] = size_x;
|
||||
global_dim[1] = size_y;
|
||||
|
||||
local_dim[0] = 1;
|
||||
local_dim[1] = 1;
|
||||
|
||||
/* Execute kernel */
|
||||
err = CL_SUCCESS;
|
||||
unsigned int num_iter = 1;
|
||||
for(i = 0; i < num_iter; i++) {
|
||||
err |= clEnqueueNDRangeKernel(commands, kernel, dim_count,
|
||||
NULL, global_dim, local_dim,
|
||||
0, NULL, NULL);
|
||||
}
|
||||
|
||||
/* Read back the results from the device to verify the output */
|
||||
const size_t origin[3] = {0, 0, 0};
|
||||
const size_t region[3] = {size_x, size_y, 1};
|
||||
err |= clEnqueueReadImage(commands, src_image, CL_TRUE, origin, region, 0, 0,
|
||||
output, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS) {
|
||||
log_error("Error: clEnqueueReadBuffer failed\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Verify the correctness of kernel result */
|
||||
err = 0;
|
||||
for (i = 0; i < size; i++) {
|
||||
if (output[i] != (input[i] + 3)) {
|
||||
log_error("Error: mismatch at index %d\n", i);
|
||||
err++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Release programs, kernel, contect, and memory objects */
|
||||
clReleaseMemObject(src_image);
|
||||
clReleaseProgram(program);
|
||||
clReleaseKernel(kernel);
|
||||
|
||||
/* Deallocate arrays */
|
||||
free(input);
|
||||
free(output);
|
||||
|
||||
return err;
|
||||
}
|
||||
153
test_conformance/basic/test_simple_image_pitch.c
Normal file
153
test_conformance/basic/test_simple_image_pitch.c
Normal file
@@ -0,0 +1,153 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
int test_simple_read_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
size_t imageW = 143;
|
||||
size_t imageH = 151;
|
||||
size_t bufferW = 151*4;
|
||||
size_t bufferH = 151;
|
||||
|
||||
size_t pixel_bytes = 4;
|
||||
size_t image_bytes = imageW * imageH * pixel_bytes;
|
||||
|
||||
size_t buffer_bytes = bufferW * bufferH;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device );
|
||||
|
||||
char* host_image = (char*)malloc(image_bytes);
|
||||
memset(host_image,0x1,image_bytes);
|
||||
|
||||
cl_image_format fmt = { 0 };
|
||||
fmt.image_channel_order = CL_RGBA;
|
||||
fmt.image_channel_data_type = CL_UNORM_INT8;
|
||||
|
||||
cl_image_desc desc = { 0 };
|
||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
desc.image_width = imageW;
|
||||
desc.image_height = imageH;
|
||||
|
||||
cl_mem image = clCreateImage(cl_context_, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE, &fmt, &desc, host_image, &err);
|
||||
test_error(err,"clCreateImage");
|
||||
|
||||
char* host_buffer = (char*)malloc(buffer_bytes);
|
||||
memset(host_buffer,0xa,buffer_bytes);
|
||||
|
||||
// Test reading from the image
|
||||
size_t origin[] = { 0, 0, 0 };
|
||||
size_t region[] = { imageW, imageH, 1 };
|
||||
|
||||
err = clEnqueueReadImage(q, image, CL_TRUE, origin, region, bufferW, 0, host_buffer, 0, NULL, NULL);
|
||||
test_error(err,"clEnqueueReadImage");
|
||||
|
||||
size_t errors = 0;
|
||||
for (size_t j=0;j<bufferH;++j) {
|
||||
for (size_t i=0;i<bufferW;++i) {
|
||||
char val = host_buffer[j*bufferW+i];
|
||||
if ((i<imageW*pixel_bytes) && (val != 0x1)) {
|
||||
log_error("Bad value %x in image at (byte: %lu, row: %lu)\n",val,i,j);
|
||||
++errors;
|
||||
}
|
||||
else if ((i>=imageW*pixel_bytes) && (val != 0xa)) {
|
||||
log_error("Bad value %x outside image at (byte: %lu, row: %lu)\n",val,i,j);
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_error(clReleaseMemObject(image),"clReleaseMemObject");
|
||||
free(host_image);
|
||||
free(host_buffer);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
int test_simple_write_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements)
|
||||
{
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
size_t imageW = 143;
|
||||
size_t imageH = 151;
|
||||
size_t bufferW = 151*4;
|
||||
size_t bufferH = 151;
|
||||
|
||||
size_t pixel_bytes = 4;
|
||||
size_t image_bytes = imageW * imageH * pixel_bytes;
|
||||
|
||||
size_t buffer_bytes = bufferW * bufferH;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device );
|
||||
|
||||
char* host_image = (char*)malloc(image_bytes);
|
||||
memset(host_image,0x0,image_bytes);
|
||||
|
||||
cl_image_format fmt = { 0 };
|
||||
fmt.image_channel_order = CL_RGBA;
|
||||
fmt.image_channel_data_type = CL_UNORM_INT8;
|
||||
|
||||
cl_image_desc desc = { 0 };
|
||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
desc.image_width = imageW;
|
||||
desc.image_height = imageH;
|
||||
|
||||
cl_mem image = clCreateImage(cl_context_, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE, &fmt, &desc, host_image, &err);
|
||||
test_error(err,"clCreateImage");
|
||||
|
||||
char* host_buffer = (char*)malloc(buffer_bytes);
|
||||
memset(host_buffer,0xa,buffer_bytes);
|
||||
|
||||
// Test reading from the image
|
||||
size_t origin[] = { 0, 0, 0 };
|
||||
size_t region[] = { imageW, imageH, 1 };
|
||||
|
||||
err = clEnqueueWriteImage(q, image, CL_TRUE, origin, region, bufferW, 0, host_buffer, 0, NULL, NULL);
|
||||
test_error(err,"clEnqueueWriteImage");
|
||||
|
||||
size_t mapped_pitch = 0;
|
||||
char* mapped_image = (char*)clEnqueueMapImage(q, image, CL_TRUE, CL_MAP_READ, origin, region, &mapped_pitch, NULL, 0, NULL, NULL, &err);
|
||||
test_error(err,"clEnqueueMapImage");
|
||||
|
||||
size_t errors = 0;
|
||||
for (size_t j=0;j<imageH;++j) {
|
||||
for (size_t i=0;i<mapped_pitch;++i) {
|
||||
char val = mapped_image[j*mapped_pitch+i];
|
||||
if ((i<imageW*pixel_bytes) && (val != 0xa)) {
|
||||
log_error("Bad value %x in image at (byte: %lu, row: %lu)\n",val,i,j);
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
err = clEnqueueUnmapMemObject(q, image, (void *)mapped_image, 0, 0, 0);
|
||||
test_error(err,"clEnqueueUnmapMemObject");
|
||||
|
||||
test_error(clReleaseMemObject(image),"clReleaseMemObject");
|
||||
free(host_image);
|
||||
free(host_buffer);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
397
test_conformance/basic/test_sizeof.c
Normal file
397
test_conformance/basic/test_sizeof.c
Normal file
@@ -0,0 +1,397 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include "procs.h"
|
||||
|
||||
|
||||
|
||||
cl_int get_type_size( cl_context context, cl_command_queue queue, const char *type, cl_ulong *size )
|
||||
{
|
||||
const char *sizeof_kernel_code[4] =
|
||||
{
|
||||
"", /* optional pragma string */
|
||||
"__kernel __attribute__((reqd_work_group_size(1,1,1))) void test_sizeof(__global uint *dst) \n"
|
||||
"{\n"
|
||||
" dst[0] = (uint) sizeof( ", type, " );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
cl_program p;
|
||||
cl_kernel k;
|
||||
cl_mem m;
|
||||
cl_uint temp;
|
||||
|
||||
|
||||
if (!strncmp(type, "double", 6))
|
||||
{
|
||||
sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
||||
}
|
||||
else if (!strncmp(type, "half", 4))
|
||||
{
|
||||
sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
|
||||
}
|
||||
|
||||
cl_int err = create_single_kernel_helper_with_build_options(context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", "-cl-std=CL2.0");
|
||||
if( err )
|
||||
return err;
|
||||
|
||||
m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err );
|
||||
if( NULL == m )
|
||||
{
|
||||
clReleaseProgram( p );
|
||||
clReleaseKernel( k );
|
||||
log_error("\nclCreateBuffer FAILED\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
err = clSetKernelArg( k, 0, sizeof( cl_mem ), &m );
|
||||
if( err )
|
||||
{
|
||||
clReleaseProgram( p );
|
||||
clReleaseKernel( k );
|
||||
clReleaseMemObject( m );
|
||||
log_error("\nclSetKernelArg FAILED\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
err = clEnqueueTask( queue, k, 0, NULL, NULL );
|
||||
clReleaseProgram( p );
|
||||
clReleaseKernel( k );
|
||||
if( err )
|
||||
{
|
||||
clReleaseMemObject( m );
|
||||
log_error( "\nclEnqueueTask FAILED\n" );
|
||||
return err;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, m, CL_TRUE, 0, sizeof( cl_uint ), &temp, 0, NULL, NULL );
|
||||
clReleaseMemObject( m );
|
||||
if( err )
|
||||
log_error( "\nclEnqueueReadBuffer FAILED\n" );
|
||||
|
||||
*size = (cl_ulong) temp;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
typedef struct size_table
|
||||
{
|
||||
const char *name;
|
||||
cl_ulong size;
|
||||
cl_ulong cl_size;
|
||||
}size_table;
|
||||
|
||||
const size_table scalar_table[] =
|
||||
{
|
||||
// Fixed size entries from table 6.1
|
||||
{ "char", 1, sizeof( cl_char ) },
|
||||
{ "uchar", 1, sizeof( cl_uchar) },
|
||||
{ "unsigned char", 1, sizeof( cl_uchar) },
|
||||
{ "short", 2, sizeof( cl_short) },
|
||||
{ "ushort", 2, sizeof( cl_ushort) },
|
||||
{ "unsigned short", 2, sizeof( cl_ushort) },
|
||||
{ "int", 4, sizeof( cl_int ) },
|
||||
{ "uint", 4, sizeof( cl_uint) },
|
||||
{ "unsigned int", 4, sizeof( cl_uint) },
|
||||
{ "float", 4, sizeof( cl_float) },
|
||||
{ "long", 8, sizeof( cl_long ) },
|
||||
{ "ulong", 8, sizeof( cl_ulong) },
|
||||
{ "unsigned long", 8, sizeof( cl_ulong) }
|
||||
};
|
||||
|
||||
const size_table vector_table[] =
|
||||
{
|
||||
// Fixed size entries from table 6.1
|
||||
{ "char", 1, sizeof( cl_char ) },
|
||||
{ "uchar", 1, sizeof( cl_uchar) },
|
||||
{ "short", 2, sizeof( cl_short) },
|
||||
{ "ushort", 2, sizeof( cl_ushort) },
|
||||
{ "int", 4, sizeof( cl_int ) },
|
||||
{ "uint", 4, sizeof( cl_uint) },
|
||||
{ "float", 4, sizeof( cl_float) },
|
||||
{ "long", 8, sizeof( cl_long ) },
|
||||
{ "ulong", 8, sizeof( cl_ulong) }
|
||||
};
|
||||
|
||||
const char *ptr_table[] =
|
||||
{
|
||||
"void*",
|
||||
"size_t",
|
||||
"sizeof(int)", // check return type of sizeof
|
||||
"ptrdiff_t"
|
||||
};
|
||||
|
||||
const char *other_types[] =
|
||||
{
|
||||
"event_t",
|
||||
"image2d_t",
|
||||
"image3d_t",
|
||||
"sampler_t"
|
||||
};
|
||||
|
||||
static int IsPowerOfTwo( cl_ulong x ){ return 0 == (x & (x-1)); }
|
||||
|
||||
int test_sizeof(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
size_t i, j;
|
||||
cl_ulong test;
|
||||
cl_uint ptr_size = CL_UINT_MAX;
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
// Check address space size
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(ptr_size), &ptr_size, NULL);
|
||||
if( err || ptr_size > 64)
|
||||
{
|
||||
log_error( "FAILED: Unable to get CL_DEVICE_ADDRESS_BITS for device %p\n", device );
|
||||
return -1;
|
||||
}
|
||||
log_info( "\tCL_DEVICE_ADDRESS_BITS = %u\n", ptr_size );
|
||||
ptr_size /= 8;
|
||||
|
||||
// Test standard scalar sizes
|
||||
for( i = 0; i < sizeof( scalar_table ) / sizeof( scalar_table[0] ); i++ )
|
||||
{
|
||||
if( ! gHasLong &&
|
||||
(0 == strcmp(scalar_table[i].name, "long") ||
|
||||
0 == strcmp(scalar_table[i].name, "ulong") ||
|
||||
0 == strcmp(scalar_table[i].name, "unsigned long")))
|
||||
{
|
||||
log_info("\nLongs are not supported by this device. Skipping test.\t");
|
||||
continue;
|
||||
}
|
||||
|
||||
test = CL_ULONG_MAX;
|
||||
err = get_type_size( context, queue, scalar_table[i].name, &test );
|
||||
if( err )
|
||||
return err;
|
||||
if( test != scalar_table[i].size )
|
||||
{
|
||||
log_error( "\nFAILED: Type %s has size %lld, but expected size %lld!\n", scalar_table[i].name, test, scalar_table[i].size );
|
||||
return -1;
|
||||
}
|
||||
if( test != scalar_table[i].cl_size )
|
||||
{
|
||||
log_error( "\nFAILED: Type %s has size %lld, but cl_ size is %lld!\n", scalar_table[i].name, test, scalar_table[i].cl_size );
|
||||
return -2;
|
||||
}
|
||||
log_info( "%16s", scalar_table[i].name );
|
||||
}
|
||||
log_info( "\n" );
|
||||
|
||||
// Test standard vector sizes
|
||||
for( j = 2; j <= 16; j *= 2 )
|
||||
{
|
||||
// For each vector size, iterate through types
|
||||
for( i = 0; i < sizeof( vector_table ) / sizeof( vector_table[0] ); i++ )
|
||||
{
|
||||
if( !gHasLong &&
|
||||
(0 == strcmp(vector_table[i].name, "long") ||
|
||||
0 == strcmp(vector_table[i].name, "ulong")))
|
||||
{
|
||||
log_info("\nLongs are not supported by this device. Skipping test.\t");
|
||||
continue;
|
||||
}
|
||||
|
||||
char name[32];
|
||||
sprintf( name, "%s%ld", vector_table[i].name, j );
|
||||
|
||||
test = CL_ULONG_MAX;
|
||||
err = get_type_size( context, queue, name, &test );
|
||||
if( err )
|
||||
return err;
|
||||
if( test != j * vector_table[i].size )
|
||||
{
|
||||
log_error( "\nFAILED: Type %s has size %lld, but expected size %lld!\n", name, test, j * vector_table[i].size );
|
||||
return -1;
|
||||
}
|
||||
if( test != j * vector_table[i].cl_size )
|
||||
{
|
||||
log_error( "\nFAILED: Type %s has size %lld, but cl_ size is %lld!\n", name, test, j * vector_table[i].cl_size );
|
||||
return -2;
|
||||
}
|
||||
log_info( "%16s", name );
|
||||
}
|
||||
log_info( "\n" );
|
||||
}
|
||||
|
||||
//Check that pointer sizes are correct
|
||||
for( i = 0; i < sizeof( ptr_table ) / sizeof( ptr_table[0] ); i++ )
|
||||
{
|
||||
test = CL_ULONG_MAX;
|
||||
err = get_type_size( context, queue, ptr_table[i], &test );
|
||||
if( err )
|
||||
return err;
|
||||
if( test != ptr_size )
|
||||
{
|
||||
log_error( "\nFAILED: Type %s has size %lld, but expected size %u!\n", ptr_table[i], test, ptr_size );
|
||||
return -1;
|
||||
}
|
||||
log_info( "%16s", ptr_table[i] );
|
||||
}
|
||||
|
||||
// Check that intptr_t is large enough
|
||||
test = CL_ULONG_MAX;
|
||||
err = get_type_size( context, queue, "intptr_t", &test );
|
||||
if( err )
|
||||
return err;
|
||||
if( test < ptr_size )
|
||||
{
|
||||
log_error( "\nFAILED: intptr_t has size %lld, but must be at least %u!\n", test, ptr_size );
|
||||
return -1;
|
||||
}
|
||||
if( ! IsPowerOfTwo( test ) )
|
||||
{
|
||||
log_error( "\nFAILED: sizeof(intptr_t) is %lld, but must be a power of two!\n", test );
|
||||
return -2;
|
||||
}
|
||||
log_info( "%16s", "intptr_t" );
|
||||
|
||||
// Check that uintptr_t is large enough
|
||||
test = CL_ULONG_MAX;
|
||||
err = get_type_size( context, queue, "uintptr_t", &test );
|
||||
if( err )
|
||||
return err;
|
||||
if( test < ptr_size )
|
||||
{
|
||||
log_error( "\nFAILED: uintptr_t has size %lld, but must be at least %u!\n", test, ptr_size );
|
||||
return -1;
|
||||
}
|
||||
if( ! IsPowerOfTwo( test ) )
|
||||
{
|
||||
log_error( "\nFAILED: sizeof(uintptr_t) is %lld, but must be a power of two!\n", test );
|
||||
return -2;
|
||||
}
|
||||
log_info( "%16s\n", "uintptr_t" );
|
||||
|
||||
//Check that other types are powers of two
|
||||
for( i = 0; i < sizeof( other_types ) / sizeof( other_types[0] ); i++ )
|
||||
{
|
||||
if( 0 == strcmp(other_types[i], "image2d_t") &&
|
||||
checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
|
||||
{
|
||||
log_info("\nimages are not supported by this device. Skipping test.\t");
|
||||
continue;
|
||||
}
|
||||
|
||||
if( gIsEmbedded &&
|
||||
0 == strcmp(other_types[i], "image3d_t") &&
|
||||
checkFor3DImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
|
||||
{
|
||||
log_info("\n3D images are not supported by this device. Skipping test.\t");
|
||||
continue;
|
||||
}
|
||||
|
||||
if( 0 == strcmp(other_types[i], "sampler_t") &&
|
||||
checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
|
||||
{
|
||||
log_info("\nimages are not supported by this device. Skipping test.\t");
|
||||
continue;
|
||||
}
|
||||
|
||||
test = CL_ULONG_MAX;
|
||||
err = get_type_size( context, queue, other_types[i], &test );
|
||||
if( err )
|
||||
return err;
|
||||
if( ! IsPowerOfTwo( test ) )
|
||||
{
|
||||
log_error( "\nFAILED: Type %s has size %lld, which is not a power of two (section 6.1.5)!\n", other_types[i], test );
|
||||
return -1;
|
||||
}
|
||||
log_info( "%16s", other_types[i] );
|
||||
}
|
||||
log_info( "\n" );
|
||||
|
||||
|
||||
//Check double
|
||||
if( is_extension_available( device, "cl_khr_fp64" ) )
|
||||
{
|
||||
log_info( "\tcl_khr_fp64:" );
|
||||
test = CL_ULONG_MAX;
|
||||
err = get_type_size( context, queue, "double", &test );
|
||||
if( err )
|
||||
return err;
|
||||
if( test != 8 )
|
||||
{
|
||||
log_error( "\nFAILED: double has size %lld, but must be 8!\n", test );
|
||||
return -1;
|
||||
}
|
||||
log_info( "%16s", "double" );
|
||||
|
||||
// Test standard vector sizes
|
||||
for( j = 2; j <= 16; j *= 2 )
|
||||
{
|
||||
char name[32];
|
||||
sprintf( name, "double%ld", j );
|
||||
|
||||
test = CL_ULONG_MAX;
|
||||
err = get_type_size( context, queue, name, &test );
|
||||
if( err )
|
||||
return err;
|
||||
if( test != 8*j )
|
||||
{
|
||||
log_error( "\nFAILED: %s has size %lld, but must be %ld!\n", name, test, 8 * j);
|
||||
return -1;
|
||||
}
|
||||
log_info( "%16s", name );
|
||||
}
|
||||
log_info( "\n" );
|
||||
}
|
||||
|
||||
//Check half
|
||||
if( is_extension_available( device, "cl_khr_fp16" ) )
|
||||
{
|
||||
log_info( "\tcl_khr_fp16:" );
|
||||
test = CL_ULONG_MAX;
|
||||
err = get_type_size( context, queue, "half", &test );
|
||||
if( err )
|
||||
return err;
|
||||
if( test != 2 )
|
||||
{
|
||||
log_error( "\nFAILED: half has size %lld, but must be 2!\n", test );
|
||||
return -1;
|
||||
}
|
||||
log_info( "%16s", "half" );
|
||||
|
||||
// Test standard vector sizes
|
||||
for( j = 2; j <= 16; j *= 2 )
|
||||
{
|
||||
char name[32];
|
||||
sprintf( name, "half%ld", j );
|
||||
|
||||
test = CL_ULONG_MAX;
|
||||
err = get_type_size( context, queue, name, &test );
|
||||
if( err )
|
||||
return err;
|
||||
if( test != 2*j )
|
||||
{
|
||||
log_error( "\nFAILED: %s has size %lld, but must be %ld!\n", name, test, 2 * j);
|
||||
return -1;
|
||||
}
|
||||
log_info( "%16s", name );
|
||||
}
|
||||
log_info( "\n" );
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
107
test_conformance/basic/test_vec_type_hint.c
Normal file
107
test_conformance/basic/test_vec_type_hint.c
Normal file
@@ -0,0 +1,107 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
|
||||
static const char *sample_kernel = {
|
||||
"%s\n" // optional pragma string
|
||||
"__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global int *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" dst[tid] = src[tid];\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
int vec_type_index, vec_size_index;
|
||||
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
|
||||
const char *size_names[] = {"", "2", "4", "8", "16"};
|
||||
char *program_source;
|
||||
|
||||
program_source = (char*)malloc(sizeof(char)*4096);
|
||||
|
||||
for (vec_type_index=0; vec_type_index<10; vec_type_index++) {
|
||||
if (vecType[vec_type_index] == kDouble) {
|
||||
if (!is_extension_available(deviceID, "cl_khr_fp64")) {
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
|
||||
continue;
|
||||
}
|
||||
log_info("Testing doubles.\n");
|
||||
}
|
||||
|
||||
if (vecType[vec_type_index] == kLong || vecType[vec_type_index] == kULong)
|
||||
{
|
||||
if (!gHasLong)
|
||||
{
|
||||
log_info("Extension cl_khr_int64 not supported; skipping long tests.\n");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
for (vec_size_index=0; vec_size_index<5; vec_size_index++) {
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper in, out;
|
||||
size_t global[] = {1,1,1};
|
||||
|
||||
log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
|
||||
|
||||
program_source[0] = '\0';
|
||||
sprintf(program_source, sample_kernel,
|
||||
(vecType[vec_type_index] == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
|
||||
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&program_source, "sample_test" );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
|
||||
in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*10, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*10, NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(in), &in);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 1, sizeof(out), &out);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
}
|
||||
}
|
||||
|
||||
free(program_source);
|
||||
|
||||
return 0;
|
||||
}
|
||||
406
test_conformance/basic/test_vector_creation.cpp
Normal file
406
test_conformance/basic/test_vector_creation.cpp
Normal file
@@ -0,0 +1,406 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
|
||||
|
||||
|
||||
#define DEBUG 0
|
||||
#define DEPTH 16
|
||||
// Limit the maximum code size for any given kernel.
|
||||
#define MAX_CODE_SIZE (1024*32)
|
||||
|
||||
const int sizes[] = {1, 2, 3, 4, 8, 16, -1, -1, -1, -1};
|
||||
const char *size_names[] = {"", "2", "3", "4", "8", "16" , "!!a", "!!b", "!!c", "!!d"};
|
||||
|
||||
// Creates a kernel by enumerating all possible ways of building the vector out of vloads
|
||||
// skip_to_results will skip results up to a given number. If the amount of code generated
|
||||
// is greater than MAX_CODE_SIZE, this function will return the number of results used,
|
||||
// which can then be used as the skip_to_result value to continue where it left off.
|
||||
int create_kernel(ExplicitType type, int output_size, char *program, int *number_of_results, int skip_to_result) {
|
||||
|
||||
int number_of_sizes;
|
||||
|
||||
switch (output_size) {
|
||||
case 1:
|
||||
number_of_sizes = 1;
|
||||
break;
|
||||
case 2:
|
||||
number_of_sizes = 2;
|
||||
break;
|
||||
case 3:
|
||||
number_of_sizes = 3;
|
||||
break;
|
||||
case 4:
|
||||
number_of_sizes = 4;
|
||||
break;
|
||||
case 8:
|
||||
number_of_sizes = 5;
|
||||
break;
|
||||
case 16:
|
||||
number_of_sizes = 6;
|
||||
break;
|
||||
default:
|
||||
log_error("Invalid size: %d\n", output_size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int total_results = 0;
|
||||
int current_result = 0;
|
||||
int total_vloads = 0;
|
||||
int total_program_length = 0;
|
||||
int aborted_due_to_size = 0;
|
||||
|
||||
if (skip_to_result < 0)
|
||||
skip_to_result = 0;
|
||||
|
||||
// The line of code for the vector creation
|
||||
char line[1024];
|
||||
// Keep track of what size vector we are using in each position so we can iterate through all fo them
|
||||
int pos[DEPTH];
|
||||
int max_size = output_size;
|
||||
if (DEBUG > 1) log_info("max_size: %d\n", max_size);
|
||||
|
||||
program[0] = '\0';
|
||||
sprintf(program, "%s\n__kernel void test_vector_creation(__global %s *src, __global %s%s *result) {\n",
|
||||
type == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
get_explicit_type_name(type), get_explicit_type_name(type), ( number_of_sizes == 3 ) ? "" : size_names[number_of_sizes-1]);
|
||||
total_program_length += (int)strlen(program);
|
||||
|
||||
char storePrefix[ 128 ], storeSuffix[ 128 ];
|
||||
|
||||
// Start out trying sizes 1,1,1,1,1...
|
||||
for (int i=0; i<DEPTH; i++)
|
||||
pos[i] = 0;
|
||||
|
||||
int done = 0;
|
||||
while (!done) {
|
||||
if (DEBUG > 1) {
|
||||
log_info("pos size[] = [");
|
||||
for (int k=0; k<DEPTH; k++)
|
||||
log_info(" %d ", pos[k]);
|
||||
log_info("]\n");
|
||||
}
|
||||
|
||||
// Go through the selected vector sizes and see if the first n of them fit the
|
||||
// required size exactly.
|
||||
int size_so_far = 0;
|
||||
int vloads;
|
||||
for ( vloads=0; vloads<DEPTH; vloads++) {
|
||||
if (size_so_far + sizes[pos[vloads]] <= max_size) {
|
||||
size_so_far += sizes[pos[vloads]];
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (DEBUG > 1) log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far);
|
||||
|
||||
// If they did not fit the required size exactly it is too long, so there is no point in checking any other combinations
|
||||
// of the sizes to the right. Prune them from the search.
|
||||
if (size_so_far != max_size) {
|
||||
// Zero all the sizes to the right
|
||||
for (int k=vloads+1; k<DEPTH; k++) {
|
||||
pos[k] = 0;
|
||||
}
|
||||
// Increment this current size and propagate the values up if needed
|
||||
for (int d=vloads; d>=0; d--) {
|
||||
pos[d]++;
|
||||
if (pos[d] >= number_of_sizes) {
|
||||
pos[d] = 0;
|
||||
if (d == 0) {
|
||||
// If we rolled over then we are done
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Go on to the next size since this one (and all others "under" it) didn't fit
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Generate the actual load line if we are building this part
|
||||
line[0]= '\0';
|
||||
if (skip_to_result == 0 || total_results >= skip_to_result) {
|
||||
if( number_of_sizes == 3 )
|
||||
{
|
||||
sprintf( storePrefix, "vstore3( " );
|
||||
sprintf( storeSuffix, ", %d, result )", current_result );
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf( storePrefix, "result[%d] = ", current_result );
|
||||
storeSuffix[ 0 ] = 0;
|
||||
}
|
||||
|
||||
sprintf(line, "\t%s(%s%d)(", storePrefix, get_explicit_type_name(type), output_size);
|
||||
current_result++;
|
||||
|
||||
int offset = 0;
|
||||
for (int i=0; i<vloads; i++) {
|
||||
if (pos[i] == 0)
|
||||
sprintf(line + strlen(line), "src[%d]", offset);
|
||||
else
|
||||
sprintf(line + strlen(line), "vload%s(0,src+%d)", size_names[pos[i]], offset);
|
||||
offset += sizes[pos[i]];
|
||||
if (i<(vloads-1))
|
||||
sprintf(line + strlen(line), ",");
|
||||
}
|
||||
sprintf(line + strlen(line), ")%s;\n", storeSuffix);
|
||||
|
||||
strcat(program, line);
|
||||
total_vloads += vloads;
|
||||
}
|
||||
total_results++;
|
||||
total_program_length += (int)strlen(line);
|
||||
if (total_program_length > MAX_CODE_SIZE) {
|
||||
aborted_due_to_size = 1;
|
||||
done = 1;
|
||||
}
|
||||
|
||||
|
||||
if (DEBUG) log_info("line is: %s", line);
|
||||
|
||||
// If we did not use all of them, then we ignore any changes further to the right.
|
||||
// We do this by causing those loops to skip on the next iteration.
|
||||
if (vloads < DEPTH) {
|
||||
if (DEBUG > 1) log_info("done with this depth\n");
|
||||
for (int k=vloads; k<DEPTH; k++)
|
||||
pos[k] = number_of_sizes;
|
||||
}
|
||||
|
||||
// Increment the far right size by 1, rolling over as needed
|
||||
for (int d=DEPTH-1; d>=0; d--) {
|
||||
pos[d]++;
|
||||
if (pos[d] >= number_of_sizes) {
|
||||
pos[d] = 0;
|
||||
if (d == 0) {
|
||||
// If we rolled over at the far-left then we are done
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (done)
|
||||
break;
|
||||
|
||||
// Continue until we are done.
|
||||
}
|
||||
strcat(program, "}\n\n"); //log_info("%s\n", program);
|
||||
total_program_length += 3;
|
||||
if (DEBUG) log_info("\t\t(Program for vector type %s%s contains %d vector creations, of total program length %gkB, with a total of %d vloads.)\n",
|
||||
get_explicit_type_name(type), size_names[number_of_sizes-1], total_results, total_program_length/1024.0, total_vloads);
|
||||
*number_of_results = current_result;
|
||||
if (aborted_due_to_size)
|
||||
return total_results;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
|
||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16};
|
||||
|
||||
char *program_source;
|
||||
int error;
|
||||
int total_errors = 0;
|
||||
|
||||
cl_int input_data_int[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
cl_double input_data_double[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
void *input_data_converted;
|
||||
void *output_data;
|
||||
|
||||
int number_of_results;;
|
||||
|
||||
input_data_converted = malloc(sizeof(cl_double)*16);
|
||||
program_source = (char*)malloc(sizeof(char)*1024*1024*4);
|
||||
|
||||
// Iterate over all the types
|
||||
for (int type_index=0; type_index<10; type_index++) {
|
||||
if(!gHasLong && ((vecType[type_index] == kLong) || (vecType[type_index] == kULong)))
|
||||
{
|
||||
log_info("Long/ULong data type not supported on this device\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
clMemWrapper input;
|
||||
|
||||
if (vecType[type_index] == kDouble) {
|
||||
if (!is_extension_available(deviceID, "cl_khr_fp64")) {
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
|
||||
continue;
|
||||
}
|
||||
log_info("Testing doubles.\n");
|
||||
}
|
||||
|
||||
// Convert the data to the right format for the test.
|
||||
memset(input_data_converted, 0xff, sizeof(cl_double)*16);
|
||||
if (vecType[type_index] != kDouble) {
|
||||
for (int j=0; j<16; j++) {
|
||||
convert_explicit_value(&input_data_int[j], ((char*)input_data_converted)+get_explicit_type_size(vecType[type_index])*j,
|
||||
kInt, 0, kRoundToEven, vecType[type_index]);
|
||||
}
|
||||
} else {
|
||||
memcpy(input_data_converted, &input_data_double, sizeof(cl_double)*16);
|
||||
}
|
||||
|
||||
input = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, get_explicit_type_size(vecType[type_index])*16,
|
||||
(vecType[type_index] != kDouble) ? input_data_converted : input_data_double, &error);
|
||||
if (error) {
|
||||
print_error(error, "clCreateBuffer failed");
|
||||
total_errors++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Iterate over all the vector sizes.
|
||||
for (int size_index=1; size_index< 5; size_index++) {
|
||||
size_t global[] = {1,1,1};
|
||||
int number_generated = -1;
|
||||
int previous_number_generated = 0;
|
||||
|
||||
log_info("Testing %s%s...\n", get_explicit_type_name(vecType[type_index]), size_names[size_index]);
|
||||
while (number_generated != 0) {
|
||||
clMemWrapper output;
|
||||
clKernelWrapper kernel;
|
||||
clProgramWrapper program;
|
||||
|
||||
number_generated = create_kernel(vecType[type_index], vecSizes[size_index], program_source, &number_of_results, number_generated);
|
||||
if (number_generated != 0) {
|
||||
if (previous_number_generated == 0)
|
||||
log_info("Code size greater than %gkB; splitting test into multiple kernels.\n", MAX_CODE_SIZE/1024.0);
|
||||
log_info("\tExecuting vector permutations %d to %d...\n", previous_number_generated, number_generated-1);
|
||||
}
|
||||
|
||||
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&program_source, "test_vector_creation");
|
||||
if (error) {
|
||||
log_error("create_single_kernel_helper failed.\n");
|
||||
total_errors++;
|
||||
break;
|
||||
}
|
||||
|
||||
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
||||
number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
|
||||
NULL, &error);
|
||||
if (error) {
|
||||
print_error(error, "clCreateBuffer failed");
|
||||
total_errors++;
|
||||
break;
|
||||
}
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(input), &input);
|
||||
error |= clSetKernelArg(kernel, 1, sizeof(output), &output);
|
||||
if (error) {
|
||||
print_error(error, "clSetKernelArg failed");
|
||||
total_errors++;
|
||||
break;
|
||||
}
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clEnqueueNDRangeKernel failed");
|
||||
total_errors++;
|
||||
break;
|
||||
}
|
||||
|
||||
error = clFinish(queue);
|
||||
if (error) {
|
||||
print_error(error, "clFinish failed");
|
||||
total_errors++;
|
||||
break;
|
||||
}
|
||||
|
||||
output_data = malloc(number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
|
||||
if (output_data == NULL) {
|
||||
log_error("Failed to allocate memory for output data.\n");
|
||||
total_errors++;
|
||||
break;
|
||||
}
|
||||
memset(output_data, 0xff, number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
|
||||
error = clEnqueueReadBuffer(queue, output, CL_TRUE, 0,
|
||||
number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
|
||||
output_data, 0, NULL, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clEnqueueReadBuffer failed");
|
||||
total_errors++;
|
||||
free(output_data);
|
||||
break;
|
||||
}
|
||||
|
||||
// Check the results
|
||||
char *res = (char *)output_data;
|
||||
char *exp = (char *)input_data_converted;
|
||||
for (int i=0; i<number_of_results; i++) {
|
||||
// If they do not match, then print out why
|
||||
if (memcmp(input_data_converted,
|
||||
res + i*(get_explicit_type_size(vecType[type_index])*vecSizes[size_index]),
|
||||
get_explicit_type_size(vecType[type_index])*vecSizes[size_index])
|
||||
) {
|
||||
log_error("Data failed to validate for result %d\n", i);
|
||||
|
||||
// Find the line in the program that failed. This is ugly.
|
||||
char search[32];
|
||||
char found_line[1024];
|
||||
found_line[0]='\0';
|
||||
search[0]='\0';
|
||||
sprintf(search, "result[%d] = (", i);
|
||||
char *start_loc = strstr(program_source, search);
|
||||
if (start_loc == NULL)
|
||||
log_error("Failed to find program source for failure for %s in \n%s", search, program_source);
|
||||
else {
|
||||
char *end_loc = strstr(start_loc, "\n");
|
||||
memcpy(&found_line, start_loc, (end_loc-start_loc));
|
||||
found_line[end_loc-start_loc]='\0';
|
||||
log_error("Failed vector line: %s\n", found_line);
|
||||
}
|
||||
|
||||
for (int j=0; j<(int)vecSizes[size_index]; j++) {
|
||||
char expected_value[64];
|
||||
char returned_value[64];
|
||||
expected_value[0]='\0';
|
||||
returned_value[0]='\0';
|
||||
print_type_to_string(vecType[type_index], (void*)(res+get_explicit_type_size(vecType[type_index])*(i*vecSizes[size_index]+j)), returned_value);
|
||||
print_type_to_string(vecType[type_index], (void*)(exp+get_explicit_type_size(vecType[type_index])*j), expected_value);
|
||||
log_error("index [%d, component %d]: got: %s expected: %s\n", i, j,
|
||||
returned_value, expected_value);
|
||||
}
|
||||
|
||||
total_errors++;
|
||||
}
|
||||
}
|
||||
free(output_data);
|
||||
previous_number_generated = number_generated;
|
||||
} // number_generated != 0
|
||||
|
||||
} // vector sizes
|
||||
} // vector types
|
||||
|
||||
free(input_data_converted);
|
||||
free(program_source);
|
||||
|
||||
return total_errors;
|
||||
}
|
||||
|
||||
|
||||
986
test_conformance/basic/test_vloadstore.c
Normal file
986
test_conformance/basic/test_vloadstore.c
Normal file
@@ -0,0 +1,986 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
// Outputs debug information for stores
|
||||
#define DEBUG 0
|
||||
// Forces stores/loads to be done with offsets = tid
|
||||
#define LINEAR_OFFSETS 0
|
||||
#define NUM_LOADS 512
|
||||
|
||||
static const char *doubleExtensionPragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
||||
|
||||
#pragma mark -------------------- vload harness --------------------------
|
||||
|
||||
typedef void (*create_vload_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize );
|
||||
|
||||
int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
|
||||
create_vload_program_fn createFn, size_t bufferSize, MTdata d )
|
||||
{
|
||||
int error;
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 4 ];
|
||||
const size_t numLoads = (DEBUG) ? 16 : NUM_LOADS;
|
||||
|
||||
if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128;
|
||||
|
||||
size_t threads[ 1 ], localThreads[ 1 ];
|
||||
clProtectedArray inBuffer( bufferSize );
|
||||
char programSrc[ 10240 ];
|
||||
cl_uint offsets[ numLoads ], alignmentOffsets[ numLoads ];
|
||||
size_t numElements, typeSize, i;
|
||||
unsigned int outVectorSize;
|
||||
|
||||
|
||||
typeSize = get_explicit_type_size( type );
|
||||
numElements = bufferSize / ( typeSize * vecSize );
|
||||
bufferSize = numElements * typeSize * vecSize; // To account for rounding
|
||||
|
||||
if (DEBUG) log_info("Testing: numLoads: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numLoads, (int)typeSize, vecSize, (int)numElements, (int)bufferSize);
|
||||
|
||||
// Create some random input data and random offsets to load from
|
||||
generate_random_data( type, numElements * vecSize, d, (void *)inBuffer );
|
||||
for( i = 0; i < numLoads; i++ )
|
||||
{
|
||||
offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 1, d );
|
||||
if( offsets[ i ] < numElements - 2 )
|
||||
alignmentOffsets[ i ] = (cl_uint)random_in_range( 0, (int)vecSize - 1, d );
|
||||
else
|
||||
alignmentOffsets[ i ] = 0;
|
||||
if (LINEAR_OFFSETS) offsets[i] = (cl_uint)i;
|
||||
}
|
||||
if (LINEAR_OFFSETS) log_info("Offsets set to thread IDs to simplify output.\n");
|
||||
|
||||
// 32-bit fixup
|
||||
outVectorSize = vecSize;
|
||||
|
||||
// Declare output buffers now
|
||||
#if !(defined(_WIN32) && defined(_MSC_VER))
|
||||
char outBuffer[ numLoads * typeSize * outVectorSize ];
|
||||
char referenceBuffer[ numLoads * typeSize * vecSize ];
|
||||
#else
|
||||
char* outBuffer = (char*)_malloca(numLoads * typeSize * outVectorSize * sizeof(cl_char));
|
||||
char* referenceBuffer = (char*)_malloca(numLoads * typeSize * vecSize * sizeof(cl_char));
|
||||
#endif
|
||||
|
||||
// Create the program
|
||||
|
||||
|
||||
createFn( programSrc, numElements, type, vecSize, outVectorSize);
|
||||
|
||||
// Create our kernel
|
||||
const char *ptr = programSrc;
|
||||
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
|
||||
|
||||
// Get the number of args to differentiate the kernels with local storage. (They have 5)
|
||||
cl_uint numArgs;
|
||||
error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
|
||||
test_error( error, "clGetKernelInfo failed");
|
||||
|
||||
// Set up parameters
|
||||
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, bufferSize, (void *)inBuffer, &error );
|
||||
test_error( error, "Unable to create kernel stream" );
|
||||
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(offsets[0]), offsets, &error );
|
||||
test_error( error, "Unable to create kernel stream" );
|
||||
streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(alignmentOffsets[0]), alignmentOffsets, &error );
|
||||
test_error( error, "Unable to create kernel stream" );
|
||||
streams[ 3 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*typeSize*outVectorSize, (void *)outBuffer, &error );
|
||||
test_error( error, "Unable to create kernel stream" );
|
||||
|
||||
// Set parameters and run
|
||||
if (numArgs == 5) {
|
||||
// We need to set the size of the local storage
|
||||
error = clSetKernelArg(kernel, 0, bufferSize, NULL);
|
||||
test_error( error, "clSetKernelArg for buffer failed");
|
||||
for( i = 0; i < 4; i++ )
|
||||
{
|
||||
error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
}
|
||||
} else {
|
||||
// No local storage
|
||||
for( i = 0; i < 4; i++ )
|
||||
{
|
||||
error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
}
|
||||
}
|
||||
|
||||
threads[ 0 ] = numLoads;
|
||||
error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
|
||||
test_error( error, "Unable to get local thread size" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to exec kernel" );
|
||||
|
||||
// Get the results
|
||||
error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, numLoads * typeSize * outVectorSize * sizeof(cl_char), (void *)outBuffer, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
|
||||
// Create the reference results
|
||||
memset( referenceBuffer, 0, numLoads * typeSize * vecSize * sizeof(cl_char));
|
||||
for( i = 0; i < numLoads; i++ )
|
||||
{
|
||||
memcpy( referenceBuffer + i * typeSize * vecSize, ( (char *)(void *)inBuffer ) + ( ( offsets[ i ] * vecSize ) + alignmentOffsets[ i ] ) * typeSize,
|
||||
typeSize * vecSize );
|
||||
}
|
||||
|
||||
// Validate the results now
|
||||
char *expected = referenceBuffer;
|
||||
char *actual = outBuffer;
|
||||
char *in = (char *)(void *)inBuffer;
|
||||
|
||||
if (DEBUG) {
|
||||
log_info("Memory contents:\n");
|
||||
for (i=0; i<numElements; i++) {
|
||||
char inString[1024];
|
||||
char expectedString[ 1024 ], actualString[ 1024 ];
|
||||
if (i < numLoads) {
|
||||
log_info("buffer %3d: input: %s expected: %s got: %s (load offset %3d, alignment offset %3d)", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ),
|
||||
GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
|
||||
GetDataVectorString( &(actual[i*typeSize*outVectorSize]), typeSize, vecSize, actualString ),
|
||||
offsets[i], alignmentOffsets[i]);
|
||||
if (memcmp(&(expected[i*typeSize*vecSize]), &(actual[i*typeSize*outVectorSize]), typeSize * vecSize) != 0)
|
||||
log_error(" << ERROR\n");
|
||||
else
|
||||
log_info("\n");
|
||||
} else {
|
||||
log_info("buffer %3d: input: %s expected: %s got: %s\n", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ),
|
||||
GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
|
||||
GetDataVectorString( &(actual[i*typeSize*outVectorSize]), typeSize, vecSize, actualString ));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for( i = 0; i < numLoads; i++ )
|
||||
{
|
||||
if( memcmp( expected, actual, typeSize * vecSize ) != 0 )
|
||||
{
|
||||
char expectedString[ 1024 ], actualString[ 1024 ];
|
||||
log_error( "ERROR: Data sample %d for vload of %s%d did not validate (expected {%s}, got {%s}, loaded from offset %d)\n",
|
||||
(int)i, get_explicit_type_name( type ), vecSize, GetDataVectorString( expected, typeSize, vecSize, expectedString ),
|
||||
GetDataVectorString( actual, typeSize, vecSize, actualString ), (int)offsets[ i ] );
|
||||
return 1;
|
||||
}
|
||||
expected += typeSize * vecSize;
|
||||
actual += typeSize * outVectorSize;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_vloadset(cl_device_id device, cl_context context, cl_command_queue queue, create_vload_program_fn createFn, size_t bufferSize )
|
||||
{
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
|
||||
unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
|
||||
const char *size_names[] = { "2", "3", "4", "8", "16"};
|
||||
unsigned int typeIdx, sizeIdx;
|
||||
int error = 0;
|
||||
MTdata mtData = init_genrand( gRandomSeed );
|
||||
|
||||
log_info("Testing with buffer size of %d.\n", (int)bufferSize);
|
||||
|
||||
for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
|
||||
{
|
||||
|
||||
if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
|
||||
continue;
|
||||
|
||||
if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
|
||||
continue;
|
||||
|
||||
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
|
||||
{
|
||||
log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
|
||||
|
||||
int error_this_type = test_vload( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, mtData );
|
||||
if (error_this_type) {
|
||||
error += error_this_type;
|
||||
log_error("Failure; skipping further sizes for this type.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free_mtdata(mtData);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
#pragma mark -------------------- vload test cases --------------------------
|
||||
|
||||
void create_global_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
|
||||
{
|
||||
const char *pattern =
|
||||
"%s%s"
|
||||
"__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s%d tmp = vload%d( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
|
||||
" results[ tid ] = tmp;\n"
|
||||
"}\n";
|
||||
|
||||
const char *patternV3 =
|
||||
"%s%s"
|
||||
"__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s3 tmp = vload3( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
|
||||
" results[ 3*tid ] = tmp.s0;\n"
|
||||
" results[ 3*tid+1 ] = tmp.s1;\n"
|
||||
" results[ 3*tid+2 ] = tmp.s2;\n"
|
||||
"}\n";
|
||||
|
||||
const char *typeName = get_explicit_type_name(type);
|
||||
if(inVectorSize == 3) {
|
||||
sprintf( destBuffer, patternV3,
|
||||
type == kDouble ? doubleExtensionPragma : "",
|
||||
"",
|
||||
typeName, typeName, typeName, typeName );
|
||||
} else {
|
||||
sprintf( destBuffer, pattern, type == kDouble ? doubleExtensionPragma : "",
|
||||
"",
|
||||
typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
|
||||
(int)inVectorSize, typeName );
|
||||
}
|
||||
}
|
||||
|
||||
int test_vload_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
return test_vloadset( device, context, queue, create_global_load_code, 10240 );
|
||||
}
|
||||
|
||||
|
||||
void create_local_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
|
||||
{
|
||||
const char *pattern =
|
||||
"%s%s"
|
||||
//" __local %s%d sSharedStorage[ %d ];\n"
|
||||
"__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" int lid = get_local_id( 0 );\n"
|
||||
"\n"
|
||||
" if( lid == 0 )\n"
|
||||
" {\n"
|
||||
" for( int i = 0; i < %d; i++ )\n"
|
||||
" sSharedStorage[ i ] = src[ i ];\n"
|
||||
" }\n"
|
||||
// Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
|
||||
// threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
|
||||
// updated on all threads at that point
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
"\n"
|
||||
" %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
|
||||
" results[ tid ] = tmp;\n"
|
||||
"}\n";
|
||||
|
||||
const char *patternV3 =
|
||||
"%s%s"
|
||||
//" __local %s%d sSharedStorage[ %d ];\n"
|
||||
"__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" int lid = get_local_id( 0 );\n"
|
||||
"\n"
|
||||
" if( lid == 0 )\n"
|
||||
" {\n"
|
||||
" for( int i = 0; i < %d; i++ ) {\n"
|
||||
" sSharedStorage[ 3*i ] = src[ 3*i ];\n"
|
||||
" sSharedStorage[ 3*i +1] = src[ 3*i +1];\n"
|
||||
" sSharedStorage[ 3*i +2] = src[ 3*i +2];\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
// Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
|
||||
// threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
|
||||
// updated on all threads at that point
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
"\n"
|
||||
" %s3 tmp = vload3( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
|
||||
" results[ 3*tid ] = tmp.s0;\n"
|
||||
" results[ 3*tid +1] = tmp.s1;\n"
|
||||
" results[ 3*tid +2] = tmp.s2;\n"
|
||||
"}\n";
|
||||
|
||||
const char *typeName = get_explicit_type_name(type);
|
||||
if(inVectorSize == 3) {
|
||||
sprintf( destBuffer, patternV3,
|
||||
type == kDouble ? doubleExtensionPragma : "",
|
||||
"",
|
||||
typeName, /*(int)inBufferSize,*/
|
||||
typeName, typeName,
|
||||
(int)inBufferSize,
|
||||
typeName, typeName );
|
||||
} else {
|
||||
sprintf( destBuffer, pattern,
|
||||
type == kDouble ? doubleExtensionPragma : "",
|
||||
"",
|
||||
typeName, (int)inVectorSize, /*(int)inBufferSize,*/
|
||||
typeName, (int)inVectorSize, typeName, (int)outVectorSize,
|
||||
(int)inBufferSize,
|
||||
typeName, (int)inVectorSize, (int)inVectorSize, typeName );
|
||||
}
|
||||
}
|
||||
|
||||
int test_vload_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
// Determine the max size of a local buffer that we can test against
|
||||
cl_ulong localSize;
|
||||
int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
|
||||
test_error( error, "Unable to get max size of local memory buffer" );
|
||||
if( localSize > 10240 )
|
||||
localSize = 10240;
|
||||
if (localSize > 4096)
|
||||
localSize -= 2048;
|
||||
else
|
||||
localSize /= 2;
|
||||
|
||||
return test_vloadset( device, context, queue, create_local_load_code, (size_t)localSize );
|
||||
}
|
||||
|
||||
|
||||
void create_constant_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
|
||||
{
|
||||
const char *pattern =
|
||||
"%s%s"
|
||||
"__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s%d tmp = vload%d( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
|
||||
" results[ tid ] = tmp;\n"
|
||||
"}\n";
|
||||
|
||||
const char *patternV3 =
|
||||
"%s%s"
|
||||
"__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s3 tmp = vload3( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
|
||||
" results[ 3*tid ] = tmp.s0;\n"
|
||||
" results[ 3*tid+1 ] = tmp.s1;\n"
|
||||
" results[ 3*tid+2 ] = tmp.s2;\n"
|
||||
"}\n";
|
||||
|
||||
const char *typeName = get_explicit_type_name(type);
|
||||
if(inVectorSize == 3) {
|
||||
sprintf( destBuffer, patternV3,
|
||||
type == kDouble ? doubleExtensionPragma : "",
|
||||
"",
|
||||
typeName, typeName, typeName,
|
||||
typeName );
|
||||
} else {
|
||||
sprintf( destBuffer, pattern,
|
||||
type == kDouble ? doubleExtensionPragma : "",
|
||||
"",
|
||||
typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
|
||||
(int)inVectorSize, typeName );
|
||||
}
|
||||
}
|
||||
|
||||
int test_vload_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
// Determine the max size of a local buffer that we can test against
|
||||
cl_ulong maxSize;
|
||||
int error = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, NULL );
|
||||
test_error( error, "Unable to get max size of constant memory buffer" );
|
||||
if( maxSize > 10240 )
|
||||
maxSize = 10240;
|
||||
if (maxSize > 4096)
|
||||
maxSize -= 2048;
|
||||
else
|
||||
maxSize /= 2;
|
||||
|
||||
return test_vloadset( device, context, queue, create_constant_load_code, (size_t)maxSize );
|
||||
}
|
||||
|
||||
|
||||
void create_private_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
|
||||
{
|
||||
const char *pattern =
|
||||
"%s%s"
|
||||
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
|
||||
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
|
||||
"#define PRIV_TYPE %s%d\n"
|
||||
"#define PRIV_SIZE %d\n"
|
||||
"__kernel void test_fn( __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
|
||||
"{\n"
|
||||
" __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
"\n"
|
||||
" for( int i = 0; i < %d; i++ )\n"
|
||||
" sPrivateStorage[ i ] = src[ i ];\n"
|
||||
// Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
|
||||
// anybody else to sync up
|
||||
"\n"
|
||||
" %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
|
||||
" results[ tid ] = tmp;\n"
|
||||
"}\n";
|
||||
|
||||
const char *patternV3 =
|
||||
"%s%s"
|
||||
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
|
||||
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
|
||||
"#define PRIV_TYPE %s\n"
|
||||
"#define PRIV_SIZE %d\n"
|
||||
"__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
|
||||
"{\n"
|
||||
" __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
"\n"
|
||||
" for( int i = 0; i < PRIV_SIZE; i++ )\n"
|
||||
" {\n"
|
||||
" sPrivateStorage[ i ] = src[ i ];\n"
|
||||
" }\n"
|
||||
// Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
|
||||
// anybody else to sync up
|
||||
"\n"
|
||||
" %s3 tmp = vload3( offsets[ tid ], ( sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
|
||||
" results[ 3*tid ] = tmp.s0;\n"
|
||||
" results[ 3*tid+1 ] = tmp.s1;\n"
|
||||
" results[ 3*tid+2 ] = tmp.s2;\n"
|
||||
"}\n";
|
||||
|
||||
const char *typeName = get_explicit_type_name(type);
|
||||
if(inVectorSize ==3) {
|
||||
sprintf( destBuffer, patternV3,
|
||||
type == kDouble ? doubleExtensionPragma : "",
|
||||
"",
|
||||
typeName, 3*((int)inBufferSize),
|
||||
typeName, typeName,
|
||||
typeName );
|
||||
// log_info("Src is \"\n%s\n\"\n", destBuffer);
|
||||
} else {
|
||||
sprintf( destBuffer, pattern,
|
||||
type == kDouble ? doubleExtensionPragma : "",
|
||||
"",
|
||||
typeName, (int)inVectorSize, (int)inBufferSize,
|
||||
typeName, (int)inVectorSize, typeName, (int)outVectorSize,
|
||||
(int)inBufferSize,
|
||||
typeName, (int)inVectorSize, (int)inVectorSize, typeName );
|
||||
}
|
||||
}
|
||||
|
||||
int test_vload_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
// We have no idea how much actual private storage is available, so just pick a reasonable value,
|
||||
// which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
|
||||
return test_vloadset( device, context, queue, create_private_load_code, 256 );
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma mark -------------------- vstore harness --------------------------
|
||||
|
||||
typedef void (*create_vstore_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize );
|
||||
|
||||
int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
|
||||
create_vstore_program_fn createFn, size_t bufferSize, MTdata d )
|
||||
{
|
||||
int error;
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 3 ];
|
||||
|
||||
size_t threads[ 1 ], localThreads[ 1 ];
|
||||
|
||||
size_t numElements, typeSize, numStores = (DEBUG) ? 16 : NUM_LOADS;
|
||||
|
||||
if (DEBUG)
|
||||
bufferSize = (bufferSize < 128) ? bufferSize : 128;
|
||||
|
||||
typeSize = get_explicit_type_size( type );
|
||||
numElements = bufferSize / ( typeSize * vecSize );
|
||||
bufferSize = numElements * typeSize * vecSize; // To account for rounding
|
||||
if( numStores > numElements * 2 / 3 )
|
||||
{
|
||||
// Note: unlike load, we have to restrict the # of stores here, since all offsets must be unique for our test
|
||||
// (Plus, we leave some room for extra values to make sure didn't get written)
|
||||
numStores = numElements * 2 / 3;
|
||||
if( numStores < 1 )
|
||||
numStores = 1;
|
||||
}
|
||||
if (DEBUG)
|
||||
log_info("Testing: numStores: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numStores, (int)typeSize, vecSize, (int)numElements, (int)bufferSize);
|
||||
#if !(defined(_WIN32) && defined(_MSC_VER))
|
||||
cl_uint offsets[ numStores ];
|
||||
#else
|
||||
cl_uint* offsets = (cl_uint*)_malloca(numStores * sizeof(cl_uint));
|
||||
#endif
|
||||
char programSrc[ 10240 ];
|
||||
size_t i;
|
||||
|
||||
#if !(defined(_WIN32) && defined(_MSC_VER))
|
||||
char inBuffer[ numStores * typeSize * vecSize ];
|
||||
#else
|
||||
char* inBuffer = (char*)_malloca( numStores * typeSize * vecSize * sizeof(cl_char));
|
||||
#endif
|
||||
clProtectedArray outBuffer( numElements * typeSize * vecSize );
|
||||
#if !(defined(_WIN32) && defined(_MSC_VER))
|
||||
char referenceBuffer[ numElements * typeSize * vecSize ];
|
||||
#else
|
||||
char* referenceBuffer = (char*)_malloca(numElements * typeSize * vecSize * sizeof(cl_char));
|
||||
#endif
|
||||
|
||||
// Create some random input data and random offsets to load from
|
||||
generate_random_data( type, numStores * vecSize, d, (void *)inBuffer );
|
||||
|
||||
// Note: make sure no two offsets are the same, otherwise the output would depend on
|
||||
// the order that threads ran in, and that would be next to impossible to verify
|
||||
#if !(defined(_WIN32) && defined(_MSC_VER))
|
||||
char flags[ numElements ];
|
||||
#else
|
||||
char* flags = (char*)_malloca( numElements * sizeof(char));
|
||||
#endif
|
||||
|
||||
memset( flags, 0, numElements * sizeof(char) );
|
||||
for( i = 0; i < numStores; i++ )
|
||||
{
|
||||
do
|
||||
{
|
||||
offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 2, d ); // Note: keep it one vec below the end for offset testing
|
||||
} while( flags[ offsets[ i ] ] != 0 );
|
||||
flags[ offsets[ i ] ] = -1;
|
||||
if (LINEAR_OFFSETS)
|
||||
offsets[i] = (int)i;
|
||||
}
|
||||
if (LINEAR_OFFSETS)
|
||||
log_info("Offsets set to thread IDs to simplify output.\n");
|
||||
|
||||
createFn( programSrc, numElements, type, vecSize );
|
||||
|
||||
// Create our kernel
|
||||
const char *ptr = programSrc;
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
|
||||
|
||||
// Get the number of args to differentiate the kernels with local storage. (They have 5)
|
||||
cl_uint numArgs;
|
||||
error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
|
||||
test_error( error, "clGetKernelInfo failed");
|
||||
|
||||
// Set up parameters
|
||||
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * typeSize * vecSize * sizeof(cl_char), (void *)inBuffer, &error );
|
||||
test_error( error, "Unable to create kernel stream" );
|
||||
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * sizeof(cl_uint), offsets, &error );
|
||||
test_error( error, "Unable to create kernel stream" );
|
||||
streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numElements * typeSize * vecSize, (void *)outBuffer, &error );
|
||||
test_error( error, "Unable to create kernel stream" );
|
||||
|
||||
// Set parameters and run
|
||||
if (numArgs == 5)
|
||||
{
|
||||
// We need to set the size of the local storage
|
||||
error = clSetKernelArg(kernel, 0, bufferSize, NULL);
|
||||
test_error( error, "clSetKernelArg for buffer failed");
|
||||
for( i = 0; i < 3; i++ )
|
||||
{
|
||||
error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// No local storage
|
||||
for( i = 0; i < 3; i++ )
|
||||
{
|
||||
error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] );
|
||||
if (error)
|
||||
log_info("%s\n", programSrc);
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
}
|
||||
}
|
||||
|
||||
threads[ 0 ] = numStores;
|
||||
error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
|
||||
test_error( error, "Unable to get local thread size" );
|
||||
|
||||
// Run in a loop, changing the address offset from 0 to ( vecSize - 1 ) each time, since
|
||||
// otherwise stores might overlap each other, and it'd be a nightmare to test!
|
||||
for( cl_uint addressOffset = 0; addressOffset < vecSize; addressOffset++ )
|
||||
{
|
||||
if (DEBUG)
|
||||
log_info("\tstore addressOffset is %d, executing with threads %d\n", addressOffset, (int)threads[0]);
|
||||
|
||||
// Clear the results first
|
||||
memset( outBuffer, 0, numElements * typeSize * vecSize );
|
||||
error = clEnqueueWriteBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL );
|
||||
test_error( error, "Unable to erase result stream" );
|
||||
|
||||
// Set up the new offset and run
|
||||
if (numArgs == 5)
|
||||
error = clSetKernelArg( kernel, 3+1, sizeof( cl_uint ), &addressOffset );
|
||||
else
|
||||
error = clSetKernelArg( kernel, 3, sizeof( cl_uint ), &addressOffset );
|
||||
test_error( error, "Unable to set address offset argument" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to exec kernel" );
|
||||
|
||||
// Get the results
|
||||
error = clEnqueueReadBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
|
||||
// Create the reference results
|
||||
memset( referenceBuffer, 0, numElements * typeSize * vecSize * sizeof(cl_char) );
|
||||
for( i = 0; i < numStores; i++ )
|
||||
{
|
||||
memcpy( referenceBuffer + ( ( offsets[ i ] * vecSize ) + addressOffset ) * typeSize, inBuffer + i * typeSize * vecSize, typeSize * vecSize );
|
||||
}
|
||||
|
||||
// Validate the results now
|
||||
char *expected = referenceBuffer;
|
||||
char *actual = (char *)(void *)outBuffer;
|
||||
|
||||
if (DEBUG)
|
||||
{
|
||||
log_info("Memory contents:\n");
|
||||
for (i=0; i<numElements; i++)
|
||||
{
|
||||
char inString[1024];
|
||||
char expectedString[ 1024 ], actualString[ 1024 ];
|
||||
if (i < numStores)
|
||||
{
|
||||
log_info("buffer %3d: input: %s expected: %s got: %s (store offset %3d)", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ),
|
||||
GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
|
||||
GetDataVectorString( &(actual[i*typeSize*vecSize]), typeSize, vecSize, actualString ),
|
||||
offsets[i]);
|
||||
if (memcmp(&(expected[i*typeSize*vecSize]), &(actual[i*typeSize*vecSize]), typeSize * vecSize) != 0)
|
||||
log_error(" << ERROR\n");
|
||||
else
|
||||
log_info("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("buffer %3d: input: %s expected: %s got: %s\n", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ),
|
||||
GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
|
||||
GetDataVectorString( &(actual[i*typeSize*vecSize]), typeSize, vecSize, actualString ));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for( i = 0; i < numElements; i++ )
|
||||
{
|
||||
if( memcmp( expected, actual, typeSize * vecSize ) != 0 )
|
||||
{
|
||||
char expectedString[ 1024 ], actualString[ 1024 ];
|
||||
log_error( "ERROR: Data sample %d for vstore of %s%d did not validate (expected {%s}, got {%s}",
|
||||
(int)i, get_explicit_type_name( type ), vecSize, GetDataVectorString( expected, typeSize, vecSize, expectedString ),
|
||||
GetDataVectorString( actual, typeSize, vecSize, actualString ) );
|
||||
size_t j;
|
||||
for( j = 0; j < numStores; j++ )
|
||||
{
|
||||
if( offsets[ j ] == (cl_uint)i )
|
||||
{
|
||||
log_error( ", stored from store #%d (of %d, offset = %d) with address offset of %d", (int)j, (int)numStores, offsets[j], (int)addressOffset );
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( j == numStores )
|
||||
log_error( ", supposed to be canary value" );
|
||||
log_error( ")\n" );
|
||||
return 1;
|
||||
}
|
||||
expected += typeSize * vecSize;
|
||||
actual += typeSize * vecSize;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_vstoreset(cl_device_id device, cl_context context, cl_command_queue queue, create_vstore_program_fn createFn, size_t bufferSize )
|
||||
{
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
|
||||
unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
|
||||
const char *size_names[] = { "2", "3", "4", "8", "16"};
|
||||
unsigned int typeIdx, sizeIdx;
|
||||
int error = 0;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
log_info("Testing with buffer size of %d.\n", (int)bufferSize);
|
||||
|
||||
for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
|
||||
{
|
||||
if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
|
||||
continue;
|
||||
|
||||
if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
|
||||
continue;
|
||||
|
||||
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
|
||||
{
|
||||
log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
|
||||
|
||||
int error_this_type = test_vstore( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, d );
|
||||
if (error_this_type)
|
||||
{
|
||||
log_error("Failure; skipping further sizes for this type.\n");
|
||||
error += error_this_type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark -------------------- vstore test cases --------------------------
|
||||
|
||||
void create_global_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
|
||||
{
|
||||
const char *pattern =
|
||||
"%s"
|
||||
"__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" vstore%d( srcValues[ tid ], offsets[ tid ], destBuffer + alignmentOffset );\n"
|
||||
"}\n";
|
||||
|
||||
const char *patternV3 =
|
||||
"%s"
|
||||
"__kernel void test_fn( __global %s3 *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" if((tid&3) == 0) { // if \"tid\" is a multiple of 4 \n"
|
||||
" vstore3( srcValues[ 3*(tid>>2) ], offsets[ tid ], destBuffer + alignmentOffset );\n"
|
||||
" } else {\n"
|
||||
" vstore3( vload3(tid, (__global %s *)srcValues), offsets[ tid ], destBuffer + alignmentOffset );\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
const char *typeName = get_explicit_type_name(type);
|
||||
|
||||
if(inVectorSize == 3) {
|
||||
sprintf( destBuffer, patternV3,
|
||||
type == kDouble ? doubleExtensionPragma : "",
|
||||
typeName, typeName, typeName);
|
||||
|
||||
} else {
|
||||
sprintf( destBuffer, pattern,
|
||||
type == kDouble ? doubleExtensionPragma : "",
|
||||
typeName, (int)inVectorSize, typeName, (int)inVectorSize );
|
||||
}
|
||||
// if(inVectorSize == 3 || inVectorSize == 4) {
|
||||
// log_info("\n----\n%s\n----\n", destBuffer);
|
||||
// }
|
||||
}
|
||||
|
||||
int test_vstore_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
return test_vstoreset( device, context, queue, create_global_store_code, 10240 );
|
||||
}
|
||||
|
||||
|
||||
void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
|
||||
{
|
||||
const char *pattern =
|
||||
"%s"
|
||||
"\n"
|
||||
"__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
|
||||
" sSharedStorage[ offsets[tid] ] = (%s%d)(%s)0;\n"
|
||||
" sSharedStorage[ offsets[tid] +1 ] = sSharedStorage[ offsets[tid] ];\n"
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
"\n"
|
||||
" vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local %s *)sSharedStorage ) + alignmentOffset );\n"
|
||||
"\n"
|
||||
// Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output
|
||||
// buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
"\n"
|
||||
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
|
||||
// otherwise, local threads would be overwriting results from other local threads
|
||||
" int i;\n"
|
||||
" __local %s *sp = (__local %s*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n"
|
||||
" __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
|
||||
" for( i = 0; (size_t)i < sizeof( sSharedStorage[0]) / sizeof( *sp ); i++ ) \n"
|
||||
" dp[i] = sp[i];\n"
|
||||
"}\n";
|
||||
|
||||
const char *patternV3 =
|
||||
"%s"
|
||||
"\n"
|
||||
"__kernel void test_fn(__local %s *sSharedStorage, __global %s *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
|
||||
" sSharedStorage[ 3*offsets[tid] ] = (%s)0;\n"
|
||||
" sSharedStorage[ 3*offsets[tid] +1 ] = \n"
|
||||
" sSharedStorage[ 3*offsets[tid] ];\n"
|
||||
" sSharedStorage[ 3*offsets[tid] +2 ] = \n"
|
||||
" sSharedStorage[ 3*offsets[tid]];\n"
|
||||
" sSharedStorage[ 3*offsets[tid] +3 ] = \n"
|
||||
" sSharedStorage[ 3*offsets[tid]];\n"
|
||||
" sSharedStorage[ 3*offsets[tid] +4 ] = \n"
|
||||
" sSharedStorage[ 3*offsets[tid] ];\n"
|
||||
" sSharedStorage[ 3*offsets[tid] +5 ] = \n"
|
||||
" sSharedStorage[ 3*offsets[tid]];\n"
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
"\n"
|
||||
" vstore3( vload3(tid,srcValues), offsets[ tid ], sSharedStorage + alignmentOffset );\n"
|
||||
"\n"
|
||||
// Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output
|
||||
// buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
"\n"
|
||||
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
|
||||
// otherwise, local threads would be overwriting results from other local threads
|
||||
" int i;\n"
|
||||
" __local %s *sp = (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n"
|
||||
" __global %s *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n"
|
||||
" for( i = 0; i < 3; i++ ) \n"
|
||||
" dp[i] = sp[i];\n"
|
||||
"}\n";
|
||||
|
||||
const char *typeName = get_explicit_type_name(type);
|
||||
if(inVectorSize == 3) {
|
||||
sprintf( destBuffer, patternV3,
|
||||
type == kDouble ? doubleExtensionPragma : "",
|
||||
typeName,
|
||||
typeName,
|
||||
typeName, typeName,
|
||||
typeName, typeName, typeName );
|
||||
} else {
|
||||
sprintf( destBuffer, pattern,
|
||||
type == kDouble ? doubleExtensionPragma : "",
|
||||
typeName, (int)inVectorSize,
|
||||
typeName, (int)inVectorSize, typeName, (int)inVectorSize,
|
||||
typeName, (int)inVectorSize, typeName,
|
||||
(int)inVectorSize, typeName, typeName,
|
||||
typeName, typeName, typeName );
|
||||
}
|
||||
// log_info(destBuffer);
|
||||
}
|
||||
|
||||
int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
// Determine the max size of a local buffer that we can test against
|
||||
cl_ulong localSize;
|
||||
int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
|
||||
test_error( error, "Unable to get max size of local memory buffer" );
|
||||
if( localSize > 10240 )
|
||||
localSize = 10240;
|
||||
if (localSize > 4096)
|
||||
localSize -= 2048;
|
||||
else
|
||||
localSize /= 2;
|
||||
return test_vstoreset( device, context, queue, create_local_store_code, (size_t)localSize );
|
||||
}
|
||||
|
||||
|
||||
void create_private_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
|
||||
{
|
||||
const char *pattern =
|
||||
"%s"
|
||||
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
|
||||
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
|
||||
"\n"
|
||||
"__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
|
||||
"{\n"
|
||||
" __private %s%d sPrivateStorage[ %d ];\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
|
||||
" sPrivateStorage[tid] = (%s%d)(%s)0;\n"
|
||||
"\n"
|
||||
" vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
|
||||
"\n"
|
||||
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
|
||||
// otherwise, local threads would be overwriting results from other local threads
|
||||
" uint i;\n"
|
||||
" __private %s *sp = (__private %s*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n"
|
||||
" __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
|
||||
" for( i = 0; i < sizeof( sPrivateStorage[0]) / sizeof( *sp ); i++ ) \n"
|
||||
" dp[i] = sp[i];\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
const char *patternV3 =
|
||||
"%s"
|
||||
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
|
||||
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
|
||||
"\n"
|
||||
"__kernel void test_fn( __global %s *srcValues, __global uint *offsets, __global %s3 *destBuffer, uint alignmentOffset )\n"
|
||||
"{\n"
|
||||
" __private %s3 sPrivateStorage[ %d ];\n" // keep this %d
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
|
||||
" sPrivateStorage[tid] = (%s3)(%s)0;\n"
|
||||
"\n"
|
||||
|
||||
" vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
|
||||
"\n"
|
||||
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
|
||||
// otherwise, local threads would be overwriting results from other local threads
|
||||
" uint i;\n"
|
||||
" __private %s *sp = ((__private %s*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n"
|
||||
" __global %s *dp = ((__global %s*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n"
|
||||
" for( i = 0; i < 3; i++ ) \n"
|
||||
" dp[i] = sp[i];\n"
|
||||
"}\n";
|
||||
|
||||
const char *typeName = get_explicit_type_name(type);
|
||||
if(inVectorSize == 3) {
|
||||
sprintf( destBuffer, patternV3,
|
||||
type == kDouble ? doubleExtensionPragma : "",
|
||||
typeName, typeName,
|
||||
typeName, (int)inBufferSize,
|
||||
typeName, typeName,
|
||||
typeName, typeName, typeName, typeName, typeName );
|
||||
} else {
|
||||
sprintf( destBuffer, pattern,
|
||||
type == kDouble ? doubleExtensionPragma : "",
|
||||
typeName, (int)inVectorSize, typeName, (int)inVectorSize,
|
||||
typeName, (int)inVectorSize, (int)inBufferSize,
|
||||
typeName, (int)inVectorSize, typeName,
|
||||
(int)inVectorSize, typeName, typeName, typeName, typeName, typeName );
|
||||
}
|
||||
}
|
||||
|
||||
int test_vstore_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
// We have no idea how much actual private storage is available, so just pick a reasonable value,
|
||||
// which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
|
||||
return test_vstoreset( device, context, queue, create_private_store_code, 256 );
|
||||
}
|
||||
|
||||
|
||||
|
||||
153
test_conformance/basic/test_wg_barrier.c
Normal file
153
test_conformance/basic/test_wg_barrier.c
Normal file
@@ -0,0 +1,153 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *wg_barrier_kernel_code =
|
||||
"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
|
||||
"{\n"
|
||||
" int tid = get_local_id(0);\n"
|
||||
" int lsize = get_local_size(0);\n"
|
||||
" int i;\n"
|
||||
"\n"
|
||||
" tmp_sum[tid] = 0;\n"
|
||||
" for (i=tid; i<n; i+=lsize)\n"
|
||||
" tmp_sum[tid] += a[i];\n"
|
||||
" \n"
|
||||
" // updated to work for any workgroup size \n"
|
||||
" for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
|
||||
" {\n"
|
||||
" work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
" if (tid + i < lsize)\n"
|
||||
" tmp_sum[tid] += tmp_sum[tid + i];\n"
|
||||
" lsize = i; \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
|
||||
" if (tid == 0)\n"
|
||||
" *sum = tmp_sum[0];\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static int
|
||||
verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
|
||||
{
|
||||
int i;
|
||||
int reference = 0;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
reference += inptr[i];
|
||||
}
|
||||
|
||||
if (reference != outptr[0])
|
||||
{
|
||||
log_error("work_group_barrier test failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info("work_group_barrier test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_wg_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[3];
|
||||
cl_int *input_ptr = NULL, *output_ptr = NULL, *tmp_ptr =NULL;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t global_threads[3];
|
||||
size_t local_threads[3];
|
||||
int err;
|
||||
int i;
|
||||
size_t max_local_workgroup_size[3];
|
||||
size_t max_threadgroup_size = 0;
|
||||
MTdata d;
|
||||
|
||||
err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &wg_barrier_kernel_code, "compute_sum", "-cl-std=CL2.0" );
|
||||
test_error(err, "Failed to build kernel/program.");
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
|
||||
sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
|
||||
test_error(err, "clGetKernelWorkgroupInfo failed.");
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
// Pick the minimum of the device and the kernel
|
||||
if (max_threadgroup_size > max_local_workgroup_size[0])
|
||||
max_threadgroup_size = max_local_workgroup_size[0];
|
||||
|
||||
// work group size must divide evenly into the global size
|
||||
while( num_elements % max_threadgroup_size )
|
||||
max_threadgroup_size--;
|
||||
|
||||
input_ptr = (int*)malloc(sizeof(int) * num_elements);
|
||||
output_ptr = (int*)malloc(sizeof(int));
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int), NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * max_threadgroup_size, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed.");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
|
||||
test_error(err, "clSetKernelArg failed.");
|
||||
|
||||
global_threads[0] = max_threadgroup_size;
|
||||
local_threads[0] = max_threadgroup_size;
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
178
test_conformance/basic/test_work_item_functions.cpp
Normal file
178
test_conformance/basic/test_work_item_functions.cpp
Normal file
@@ -0,0 +1,178 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
typedef struct work_item_data
|
||||
{
|
||||
cl_uint workDim;
|
||||
cl_uint globalSize[ 3 ];
|
||||
cl_uint globalID[ 3 ];
|
||||
cl_uint localSize[ 3 ];
|
||||
cl_uint localID[ 3 ];
|
||||
cl_uint numGroups[ 3 ];
|
||||
cl_uint groupID[ 3 ];
|
||||
};
|
||||
|
||||
static const char *workItemKernelCode =
|
||||
"typedef struct {\n"
|
||||
" uint workDim;\n"
|
||||
" uint globalSize[ 3 ];\n"
|
||||
" uint globalID[ 3 ];\n"
|
||||
" uint localSize[ 3 ];\n"
|
||||
" uint localID[ 3 ];\n"
|
||||
" uint numGroups[ 3 ];\n"
|
||||
" uint groupID[ 3 ];\n"
|
||||
" } work_item_data;\n"
|
||||
"\n"
|
||||
"__kernel void sample_kernel( __global work_item_data *outData )\n"
|
||||
"{\n"
|
||||
" int id = get_global_id(0);\n"
|
||||
" outData[ id ].workDim = (uint)get_work_dim();\n"
|
||||
" for( uint i = 0; i < get_work_dim(); i++ )\n"
|
||||
" {\n"
|
||||
" outData[ id ].globalSize[ i ] = (uint)get_global_size( i );\n"
|
||||
" outData[ id ].globalID[ i ] = (uint)get_global_id( i );\n"
|
||||
" outData[ id ].localSize[ i ] = (uint)get_local_size( i );\n"
|
||||
" outData[ id ].localID[ i ] = (uint)get_local_id( i );\n"
|
||||
" outData[ id ].numGroups[ i ] = (uint)get_num_groups( i );\n"
|
||||
" outData[ id ].groupID[ i ] = (uint)get_group_id( i );\n"
|
||||
" }\n"
|
||||
"}";
|
||||
|
||||
#define NUM_TESTS 1
|
||||
|
||||
int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper outData;
|
||||
work_item_data testData[ 10240 ];
|
||||
size_t threads[3], localThreads[3];
|
||||
MTdata d;
|
||||
|
||||
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, &workItemKernelCode, "sample_kernel" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
outData = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( testData ), NULL, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( outData ), &outData );
|
||||
test_error( error, "Unable to set kernel arg" );
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for( size_t dim = 1; dim <= 3; dim++ )
|
||||
{
|
||||
for( int i = 0; i < NUM_TESTS; i++ )
|
||||
{
|
||||
size_t numItems = 1;
|
||||
for( size_t j = 0; j < dim; j++ )
|
||||
{
|
||||
// All of our thread sizes should be within the max local sizes, since they're all <= 20
|
||||
threads[ j ] = (size_t)random_in_range( 1, 20, d );
|
||||
localThreads[ j ] = threads[ j ] / (size_t)random_in_range( 1, (int)threads[ j ], d );
|
||||
while( localThreads[ j ] > 1 && ( threads[ j ] % localThreads[ j ] != 0 ) )
|
||||
localThreads[ j ]--;
|
||||
|
||||
numItems *= threads[ j ];
|
||||
|
||||
// Hack for now: localThreads > 1 are iffy
|
||||
localThreads[ j ] = 1;
|
||||
}
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, (cl_uint)dim, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to run kernel" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, outData, CL_TRUE, 0, sizeof( testData ), testData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
// Validate
|
||||
for( size_t q = 0; q < threads[0]; q++ )
|
||||
{
|
||||
// We can't really validate the actual value of each one, but we can validate that they're within a sane range
|
||||
if( testData[ q ].workDim != (cl_uint)dim )
|
||||
{
|
||||
log_error( "ERROR: get_work_dim() did not return proper value for %d dimensions (expected %d, got %d)\n", (int)dim, (int)dim, (int)testData[ q ].workDim );
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
for( size_t j = 0; j < dim; j++ )
|
||||
{
|
||||
if( testData[ q ].globalSize[ j ] != (cl_uint)threads[ j ] )
|
||||
{
|
||||
log_error( "ERROR: get_global_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n",
|
||||
(int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalSize[ j ] );
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
if( testData[ q ].globalID[ j ] < 0 || testData[ q ].globalID[ j ] >= (cl_uint)threads[ j ] )
|
||||
{
|
||||
log_error( "ERROR: get_global_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
|
||||
(int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalID[ j ] );
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
if( testData[ q ].localSize[ j ] != (cl_uint)localThreads[ j ] )
|
||||
{
|
||||
log_error( "ERROR: get_local_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n",
|
||||
(int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localSize[ j ] );
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
if( testData[ q ].localID[ j ] < 0 && testData[ q ].localID[ j ] >= (cl_uint)localThreads[ j ] )
|
||||
{
|
||||
log_error( "ERROR: get_local_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
|
||||
(int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localID[ j ] );
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
size_t groupCount = ( threads[ j ] + localThreads[ j ] - 1 ) / localThreads[ j ];
|
||||
if( testData[ q ].numGroups[ j ] != (cl_uint)groupCount )
|
||||
{
|
||||
log_error( "ERROR: get_num_groups(%d) did not return proper value for %d dimensions (expected %d with global dim %d and local dim %d, got %d)\n",
|
||||
(int)j, (int)dim, (int)groupCount, (int)threads[ j ], (int)localThreads[ j ], (int)testData[ q ].numGroups[ j ] );
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
if( testData[ q ].groupID[ j ] < 0 || testData[ q ].groupID[ j ] >= (cl_uint)groupCount )
|
||||
{
|
||||
log_error( "ERROR: get_group_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
|
||||
(int)j, (int)dim, (int)groupCount, (int)testData[ q ].groupID[ j ] );
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
354
test_conformance/basic/test_writeimage.c
Normal file
354
test_conformance/basic/test_writeimage.c
Normal file
@@ -0,0 +1,354 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *bgra8888_write_kernel_code =
|
||||
"\n"
|
||||
"__kernel void test_bgra8888_write(__global unsigned char *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color = (float4)((float)src[indx+2], (float)src[indx+1], (float)src[indx+0], (float)src[indx+3]);\n"
|
||||
" color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static const char *rgba8888_write_kernel_code =
|
||||
"\n"
|
||||
"__kernel void test_rgba8888_write(__global unsigned char *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
|
||||
" color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static unsigned char *
|
||||
generate_8888_image(int w, int h, MTdata d)
|
||||
{
|
||||
cl_uchar *ptr = (cl_uchar *)malloc(w * h * 4);
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (cl_uchar)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_bgra8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("WRITE_IMAGE_BGRA_UNORM_INT8 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("WRITE_IMAGE_BGRA_UNORM_INT8 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_rgba8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("WRITE_IMAGE_RGBA_UNORM_INT8 test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("WRITE_IMAGE_RGBA_UNORM_INT8 test passed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int test_writeimage(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[6];
|
||||
cl_program program[2];
|
||||
cl_kernel kernel[4];
|
||||
|
||||
unsigned char *input_ptr[2], *output_ptr;
|
||||
cl_image_format img_format;
|
||||
cl_image_format *supported_formats;
|
||||
size_t threads[2];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int i, err, any_err = 0;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {img_width, img_height, 1};
|
||||
size_t length = img_width * img_height * 4 * sizeof(unsigned char);
|
||||
int supportsBGRA = 0;
|
||||
cl_uint numFormats = 0;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
input_ptr[0] = generate_8888_image(img_width, img_height, d);
|
||||
input_ptr[1] = generate_8888_image(img_width, img_height, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
output_ptr = (unsigned char*)malloc(length);
|
||||
|
||||
if(gIsEmbedded)
|
||||
{
|
||||
/* Get the supported image formats to see if BGRA is supported */
|
||||
clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &numFormats);
|
||||
supported_formats = (cl_image_format *) malloc(sizeof(cl_image_format) * numFormats);
|
||||
clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, numFormats, supported_formats, NULL);
|
||||
|
||||
for(i = 0; i < numFormats; i++)
|
||||
{
|
||||
if(supported_formats[i].image_channel_order == CL_BGRA)
|
||||
{
|
||||
supportsBGRA = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
supportsBGRA = 1;
|
||||
}
|
||||
|
||||
if(supportsBGRA)
|
||||
{
|
||||
img_format.image_channel_order = CL_BGRA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = clCreateImage2D(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateImage2D failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(supportsBGRA)
|
||||
{
|
||||
img_format.image_channel_order = CL_BGRA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[2] = clCreateImage2D(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateImage2D failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[3] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[3])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
streams[4] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[4])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[5] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[5])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[4], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[5], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(supportsBGRA)
|
||||
{
|
||||
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_write_kernel_code, "test_bgra8888_write" );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
kernel[2] = clCreateKernel(program[0], "test_bgra8888_write", NULL);
|
||||
if (!kernel[2])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_write_kernel_code, "test_rgba8888_write" );
|
||||
if (err)
|
||||
return -1;
|
||||
kernel[3] = clCreateKernel(program[1], "test_rgba8888_write", NULL);
|
||||
if (!kernel[3])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(supportsBGRA)
|
||||
{
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[4], &streams[4]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[5], &streams[5]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(supportsBGRA)
|
||||
{
|
||||
err = clSetKernelArg(kernel[2], 0, sizeof streams[4], &streams[4]);
|
||||
err |= clSetKernelArg(kernel[2], 1, sizeof streams[2], &streams[2]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[3], 0, sizeof streams[5], &streams[5]);
|
||||
err |= clSetKernelArg(kernel[3], 1, sizeof streams[3], &streams[3]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)img_width;
|
||||
threads[1] = (unsigned int)img_height;
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
if(!supportsBGRA && (i == 0 || i == 2))
|
||||
continue;
|
||||
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadImage failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
case 2:
|
||||
err = verify_bgra8888_image(input_ptr[i&0x01], output_ptr, img_width, img_height);
|
||||
break;
|
||||
case 1:
|
||||
case 3:
|
||||
err = verify_rgba8888_image(input_ptr[i&0x01], output_ptr, img_width, img_height);
|
||||
break;
|
||||
}
|
||||
|
||||
//if (err)
|
||||
//break;
|
||||
|
||||
any_err |= err;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
if(supportsBGRA)
|
||||
clReleaseMemObject(streams[0]);
|
||||
|
||||
clReleaseMemObject(streams[1]);
|
||||
|
||||
if(supportsBGRA)
|
||||
clReleaseMemObject(streams[2]);
|
||||
|
||||
clReleaseMemObject(streams[3]);
|
||||
clReleaseMemObject(streams[4]);
|
||||
clReleaseMemObject(streams[5]);
|
||||
for (i=0; i<2; i++)
|
||||
{
|
||||
if(i == 0 && !supportsBGRA)
|
||||
continue;
|
||||
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseKernel(kernel[i+2]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(output_ptr);
|
||||
|
||||
return any_err;
|
||||
}
|
||||
189
test_conformance/basic/test_writeimage_fp32.c
Normal file
189
test_conformance/basic/test_writeimage_fp32.c
Normal file
@@ -0,0 +1,189 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
|
||||
static const char *rgbaFFFF_write_kernel_code =
|
||||
"__kernel void test_rgbaFFFF_write(__global float *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color = (float4)(src[indx+0], src[indx+1], src[indx+2], src[indx+3]);\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static float *
|
||||
generate_float_image(int w, int h, MTdata d)
|
||||
{
|
||||
float *ptr = (float*)malloc(w * h * 4 * sizeof(float));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_float_image(const char *string, float *image, float *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
{
|
||||
log_error("%s failed\n", string);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("%s passed\n", string);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_writeimage_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[3];
|
||||
cl_program program;
|
||||
cl_kernel kernel[2];
|
||||
cl_image_format img_format;
|
||||
float *input_ptr, *output_ptr;
|
||||
size_t threads[2];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int i, err, any_err = 0;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {img_width, img_height, 1};
|
||||
size_t length = img_width * img_height * 4 * sizeof(float);
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
input_ptr = generate_float_image(img_width, img_height, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
output_ptr = (float*)malloc(length);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgbaFFFF_write_kernel_code, "test_rgbaFFFF_write" );
|
||||
if (err)
|
||||
return -1;
|
||||
kernel[1] = clCreateKernel(program, "test_rgbaFFFF_write", NULL);
|
||||
if (!kernel[1])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)img_width;
|
||||
threads[1] = (unsigned int)img_height;
|
||||
|
||||
for (i=0; i<2; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clExecuteKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadImage failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_float_image((i == 0) ? "WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_READ_WRITE" :
|
||||
"WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_WRITE_ONLY",
|
||||
input_ptr, output_ptr, img_width, img_height);
|
||||
any_err |= err;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseKernel(kernel[1]);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return any_err;
|
||||
}
|
||||
|
||||
|
||||
195
test_conformance/basic/test_writeimage_int16.c
Normal file
195
test_conformance/basic/test_writeimage_int16.c
Normal file
@@ -0,0 +1,195 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static const char *rgba16_write_kernel_code =
|
||||
"__kernel void test_rgba16_write(__global unsigned short *src, write_only image2d_t dstimg)\n"
|
||||
"{\n"
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(dstimg) + tid_x;\n"
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" indx *= 4;\n"
|
||||
" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
|
||||
" color /= 65535.0f;\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static unsigned short *
|
||||
generate_16bit_image(int w, int h, MTdata d)
|
||||
{
|
||||
cl_ushort *ptr = (cl_ushort*)malloc(w * h * 4 * sizeof(cl_ushort));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
ptr[i] = (cl_ushort)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// normalized 16bit ints ... get dived by 64k then muled by 64k...
|
||||
// give the poor things some tolerance
|
||||
#define MAX_ERR 1
|
||||
|
||||
static int
|
||||
verify_16bit_image(const char *string, cl_ushort *image, cl_ushort *outptr, int w, int h)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
{
|
||||
if (abs(outptr[i] - image[i]) > MAX_ERR)
|
||||
{
|
||||
log_error("%s failed\n", string);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("%s passed\n", string);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_writeimage_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[3];
|
||||
cl_program program;
|
||||
cl_kernel kernel[2];
|
||||
cl_image_format img_format;
|
||||
cl_ushort *input_ptr, *output_ptr;
|
||||
size_t threads[2];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int i, err, any_err = 0;
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {img_width, img_height, 1};
|
||||
size_t length = img_width * img_height * 4 * sizeof(cl_ushort);
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
input_ptr = generate_16bit_image(img_width, img_height, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
output_ptr = (cl_ushort*)malloc(length);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgba16_write_kernel_code, "test_rgba16_write" );
|
||||
if (err)
|
||||
return -1;
|
||||
kernel[1] = clCreateKernel(program, "test_rgba16_write", NULL);
|
||||
if (!kernel[1])
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)img_width;
|
||||
threads[1] = (unsigned int)img_height;
|
||||
|
||||
for (i=0; i<2; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clExecuteKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadImage failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_16bit_image((i == 0) ? "WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_READ_WRITE" :
|
||||
"WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_WRITE_ONLY",
|
||||
input_ptr, output_ptr, img_width, img_height);
|
||||
any_err |= err;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseKernel(kernel[1]);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return any_err;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user