From 02fc5809e37025e732faff768a4d8dde83628429 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Thu, 24 Sep 2020 12:41:04 -0700 Subject: [PATCH] simple vector swizzle test (#960) * initial version, tests vec2 and vec4 * added all types and vector sizes * fix formatting * add checks for long and ulong support * test floats unconditionally, not tied to long support (oops) * fix for vec3 kernel arguments * remove generic address space dependency --- test_conformance/basic/CMakeLists.txt | 1 + test_conformance/basic/main.cpp | 215 +++--- test_conformance/basic/procs.h | 8 +- .../basic/test_vector_swizzle.cpp | 681 ++++++++++++++++++ 4 files changed, 796 insertions(+), 109 deletions(-) create mode 100644 test_conformance/basic/test_vector_swizzle.cpp diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index d73b84a2..27178246 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -39,6 +39,7 @@ set(${MODULE_NAME}_SOURCES test_async_copy.cpp test_sizeof.cpp test_vector_creation.cpp + test_vector_swizzle.cpp test_vec_type_hint.cpp test_numeric_constants.cpp test_constant_source.cpp diff --git a/test_conformance/basic/main.cpp b/test_conformance/basic/main.cpp index 11ed2c38..911f5e7b 100644 --- a/test_conformance/basic/main.cpp +++ b/test_conformance/basic/main.cpp @@ -26,130 +26,131 @@ #include "procs.h" test_definition test_list[] = { - ADD_TEST( hostptr ), - ADD_TEST( fpmath_float ), - ADD_TEST( fpmath_float2 ), - ADD_TEST( fpmath_float4 ), - ADD_TEST( intmath_int ), - ADD_TEST( intmath_int2 ), - ADD_TEST( intmath_int4 ), - ADD_TEST( intmath_long ), - ADD_TEST( intmath_long2 ), - ADD_TEST( intmath_long4 ), - ADD_TEST( hiloeo ), - ADD_TEST( if ), - ADD_TEST( sizeof ), - ADD_TEST( loop ), - ADD_TEST( pointer_cast ), - ADD_TEST( local_arg_def ), - ADD_TEST( local_kernel_def ), - ADD_TEST( local_kernel_scope ), - ADD_TEST( constant ), - ADD_TEST( constant_source ), - ADD_TEST( readimage ), - ADD_TEST( readimage_int16 ), - ADD_TEST( readimage_fp32 ), - ADD_TEST( writeimage ), - ADD_TEST( writeimage_int16 ), - ADD_TEST( writeimage_fp32 ), - ADD_TEST( mri_one ), + ADD_TEST(hostptr), + ADD_TEST(fpmath_float), + ADD_TEST(fpmath_float2), + ADD_TEST(fpmath_float4), + ADD_TEST(intmath_int), + ADD_TEST(intmath_int2), + ADD_TEST(intmath_int4), + ADD_TEST(intmath_long), + ADD_TEST(intmath_long2), + ADD_TEST(intmath_long4), + ADD_TEST(hiloeo), + ADD_TEST(if), + ADD_TEST(sizeof), + ADD_TEST(loop), + ADD_TEST(pointer_cast), + ADD_TEST(local_arg_def), + ADD_TEST(local_kernel_def), + ADD_TEST(local_kernel_scope), + ADD_TEST(constant), + ADD_TEST(constant_source), + ADD_TEST(readimage), + ADD_TEST(readimage_int16), + ADD_TEST(readimage_fp32), + ADD_TEST(writeimage), + ADD_TEST(writeimage_int16), + ADD_TEST(writeimage_fp32), + ADD_TEST(mri_one), - ADD_TEST( mri_multiple ), - ADD_TEST( image_r8 ), - ADD_TEST( barrier ), - ADD_TEST_VERSION( wg_barrier, Version(2, 0) ), - ADD_TEST( int2float ), - ADD_TEST( float2int ), - ADD_TEST( imagereadwrite ), - ADD_TEST( imagereadwrite3d ), - ADD_TEST( readimage3d ), - ADD_TEST( readimage3d_int16 ), - ADD_TEST( readimage3d_fp32 ), - ADD_TEST( bufferreadwriterect ), - ADD_TEST( arrayreadwrite ), - ADD_TEST( arraycopy ), - ADD_TEST( imagearraycopy ), - ADD_TEST( imagearraycopy3d ), - ADD_TEST( imagecopy ), - ADD_TEST( imagecopy3d ), - ADD_TEST( imagerandomcopy ), - ADD_TEST( arrayimagecopy ), - ADD_TEST( arrayimagecopy3d ), - ADD_TEST( imagenpot ), + ADD_TEST(mri_multiple), + ADD_TEST(image_r8), + ADD_TEST(barrier), + ADD_TEST_VERSION(wg_barrier, Version(2, 0)), + ADD_TEST(int2float), + ADD_TEST(float2int), + ADD_TEST(imagereadwrite), + ADD_TEST(imagereadwrite3d), + ADD_TEST(readimage3d), + ADD_TEST(readimage3d_int16), + ADD_TEST(readimage3d_fp32), + ADD_TEST(bufferreadwriterect), + ADD_TEST(arrayreadwrite), + ADD_TEST(arraycopy), + ADD_TEST(imagearraycopy), + ADD_TEST(imagearraycopy3d), + ADD_TEST(imagecopy), + ADD_TEST(imagecopy3d), + ADD_TEST(imagerandomcopy), + ADD_TEST(arrayimagecopy), + ADD_TEST(arrayimagecopy3d), + ADD_TEST(imagenpot), - ADD_TEST( vload_global ), - ADD_TEST( vload_local ), - ADD_TEST( vload_constant ), - ADD_TEST( vload_private ), - ADD_TEST( vstore_global ), - ADD_TEST( vstore_local ), - ADD_TEST( vstore_private ), + ADD_TEST(vload_global), + ADD_TEST(vload_local), + ADD_TEST(vload_constant), + ADD_TEST(vload_private), + ADD_TEST(vstore_global), + ADD_TEST(vstore_local), + ADD_TEST(vstore_private), - ADD_TEST( createkernelsinprogram ), - ADD_TEST( imagedim_pow2 ), - ADD_TEST( imagedim_non_pow2 ), - ADD_TEST( image_param ), - ADD_TEST( image_multipass_integer_coord ), - ADD_TEST( image_multipass_float_coord ), - ADD_TEST( explicit_s2v_char ), - ADD_TEST( explicit_s2v_uchar ), - ADD_TEST( explicit_s2v_short ), - ADD_TEST( explicit_s2v_ushort ), - ADD_TEST( explicit_s2v_int ), - ADD_TEST( explicit_s2v_uint ), - ADD_TEST( explicit_s2v_long ), - ADD_TEST( explicit_s2v_ulong ), - ADD_TEST( explicit_s2v_float ), - ADD_TEST( explicit_s2v_double ), + ADD_TEST(createkernelsinprogram), + ADD_TEST(imagedim_pow2), + ADD_TEST(imagedim_non_pow2), + ADD_TEST(image_param), + ADD_TEST(image_multipass_integer_coord), + ADD_TEST(image_multipass_float_coord), + ADD_TEST(explicit_s2v_char), + ADD_TEST(explicit_s2v_uchar), + ADD_TEST(explicit_s2v_short), + ADD_TEST(explicit_s2v_ushort), + ADD_TEST(explicit_s2v_int), + ADD_TEST(explicit_s2v_uint), + ADD_TEST(explicit_s2v_long), + ADD_TEST(explicit_s2v_ulong), + ADD_TEST(explicit_s2v_float), + ADD_TEST(explicit_s2v_double), - ADD_TEST( enqueue_map_buffer ), - ADD_TEST( enqueue_map_image ), + ADD_TEST(enqueue_map_buffer), + ADD_TEST(enqueue_map_image), - ADD_TEST( work_item_functions ), + ADD_TEST(work_item_functions), - ADD_TEST( astype ), + ADD_TEST(astype), - ADD_TEST( async_copy_global_to_local ), - ADD_TEST( async_copy_local_to_global ), - ADD_TEST( async_strided_copy_global_to_local ), - ADD_TEST( async_strided_copy_local_to_global ), - ADD_TEST( prefetch ), + ADD_TEST(async_copy_global_to_local), + ADD_TEST(async_copy_local_to_global), + ADD_TEST(async_strided_copy_global_to_local), + ADD_TEST(async_strided_copy_local_to_global), + ADD_TEST(prefetch), - ADD_TEST( kernel_call_kernel_function ), - ADD_TEST( host_numeric_constants ), - ADD_TEST( kernel_numeric_constants ), - ADD_TEST( kernel_limit_constants ), - ADD_TEST( kernel_preprocessor_macros ), + ADD_TEST(kernel_call_kernel_function), + ADD_TEST(host_numeric_constants), + ADD_TEST(kernel_numeric_constants), + ADD_TEST(kernel_limit_constants), + ADD_TEST(kernel_preprocessor_macros), - ADD_TEST( parameter_types ), - ADD_TEST( vector_creation ), - ADD_TEST( vec_type_hint ), - ADD_TEST( kernel_memory_alignment_local ), - ADD_TEST( kernel_memory_alignment_global ), - ADD_TEST( kernel_memory_alignment_constant ), - ADD_TEST( kernel_memory_alignment_private ), + ADD_TEST(parameter_types), + ADD_TEST(vector_creation), + ADD_TEST(vector_swizzle), + ADD_TEST(vec_type_hint), + ADD_TEST(kernel_memory_alignment_local), + ADD_TEST(kernel_memory_alignment_global), + ADD_TEST(kernel_memory_alignment_constant), + ADD_TEST(kernel_memory_alignment_private), - ADD_TEST_VERSION( progvar_prog_scope_misc, Version(2, 0) ), - ADD_TEST_VERSION( progvar_prog_scope_uninit, Version(2, 0) ), - ADD_TEST_VERSION( progvar_prog_scope_init, Version(2, 0) ), - ADD_TEST_VERSION( progvar_func_scope, Version(2, 0) ), + ADD_TEST_VERSION(progvar_prog_scope_misc, Version(2, 0)), + ADD_TEST_VERSION(progvar_prog_scope_uninit, Version(2, 0)), + ADD_TEST_VERSION(progvar_prog_scope_init, Version(2, 0)), + ADD_TEST_VERSION(progvar_func_scope, Version(2, 0)), - ADD_TEST( global_work_offsets ), - ADD_TEST( get_global_offset ), + ADD_TEST(global_work_offsets), + ADD_TEST(get_global_offset), - ADD_TEST_VERSION( global_linear_id, Version(2, 0) ), - ADD_TEST_VERSION( local_linear_id, Version(2, 0) ), - ADD_TEST_VERSION( enqueued_local_size, Version(2, 0) ), + ADD_TEST_VERSION(global_linear_id, Version(2, 0)), + ADD_TEST_VERSION(local_linear_id, Version(2, 0)), + ADD_TEST_VERSION(enqueued_local_size, Version(2, 0)), - ADD_TEST( simple_read_image_pitch ), - ADD_TEST( simple_write_image_pitch ), + ADD_TEST(simple_read_image_pitch), + ADD_TEST(simple_write_image_pitch), #if defined( __APPLE__ ) - ADD_TEST( queue_priority ), + ADD_TEST(queue_priority), #endif - ADD_TEST_VERSION( get_linear_ids, Version(2, 0) ), - ADD_TEST_VERSION( rw_image_access_qualifier, Version(2, 0) ), + ADD_TEST_VERSION(get_linear_ids, Version(2, 0)), + ADD_TEST_VERSION(rw_image_access_qualifier, Version(2, 0)), }; const int test_num = ARRAY_SIZE( test_list ); diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h index 9fe17ef4..bdb7d6a4 100644 --- a/test_conformance/basic/procs.h +++ b/test_conformance/basic/procs.h @@ -126,8 +126,12 @@ extern int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context extern int test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -extern int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_vector_creation(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_vector_swizzle(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_vec_type_hint(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); extern int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ); diff --git a/test_conformance/basic/test_vector_swizzle.cpp b/test_conformance/basic/test_vector_swizzle.cpp new file mode 100644 index 00000000..67bf7537 --- /dev/null +++ b/test_conformance/basic/test_vector_swizzle.cpp @@ -0,0 +1,681 @@ +// +// Copyright (c) 2020 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include +#include +#include + +#include "procs.h" +#include "harness/testHarness.h" + +template struct TestInfo +{ +}; + +template <> struct TestInfo<2> +{ + static const size_t vector_size = 2; + + static constexpr const char* kernel_source_xyzw = R"CLC( +__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) { + int index = 0; + + // lvalue swizzles + dst[index++].x = value.x; + dst[index++].y = value.x; + dst[index++].xy = value; + dst[index++].yx = value; + + // rvalue swizzles + dst[index++] = value.x; + dst[index++] = value.y; + dst[index++] = value.xy; + dst[index++] = value.yx; +} +)CLC"; + + static constexpr const char* kernel_source_rgba = R"CLC( +__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) { + int index = 0; + + // lvalue swizzles + dst[index++].r = value.r; + dst[index++].g = value.r; + dst[index++].rg = value; + dst[index++].gr = value; + + // rvalue swizzles + dst[index++] = value.r; + dst[index++] = value.g; + dst[index++] = value.rg; + dst[index++] = value.gr; +} +)CLC"; + + static constexpr const char* kernel_source_sN = R"CLC( +__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) { + int index = 0; + + // lvalue swizzles + dst[index++].s0 = value.s0; + dst[index++].s1 = value.s0; + dst[index++].s01 = value; + dst[index++].s10 = value; + + // rvalue swizzles + dst[index++] = value.s0; + dst[index++] = value.s1; + dst[index++] = value.s01; + dst[index++] = value.s10; +} +)CLC"; +}; + +template <> struct TestInfo<3> +{ + static const size_t vector_size = 4; // sizeof(vec3) is four elements + + static constexpr const char* kernel_source_xyzw = R"CLC( +__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) { + int index = 0; + + // lvalue swizzles + dst[index++].x = value.x; + dst[index++].y = value.x; + dst[index++].z = value.x; + dst[index++].xyz = value; + dst[index++].zyx = value; + + // rvalue swizzles + vstore3(value.x, 0, (__global BASETYPE*)(dst + index++)); + vstore3(value.y, 0, (__global BASETYPE*)(dst + index++)); + vstore3(value.z, 0, (__global BASETYPE*)(dst + index++)); + vstore3(value.xyz, 0, (__global BASETYPE*)(dst + index++)); + vstore3(value.zyx, 0, (__global BASETYPE*)(dst + index++)); +} +)CLC"; + + static constexpr const char* kernel_source_rgba = R"CLC( +__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) { + int index = 0; + + // lvalue swizzles + dst[index++].r = value.r; + dst[index++].g = value.r; + dst[index++].b = value.r; + dst[index++].rgb = value; + dst[index++].bgr = value; + + // rvalue swizzles + vstore3(value.r, 0, (__global BASETYPE*)(dst + index++)); + vstore3(value.g, 0, (__global BASETYPE*)(dst + index++)); + vstore3(value.b, 0, (__global BASETYPE*)(dst + index++)); + vstore3(value.rgb, 0, (__global BASETYPE*)(dst + index++)); + vstore3(value.bgr, 0, (__global BASETYPE*)(dst + index++)); +} +)CLC"; + + static constexpr const char* kernel_source_sN = R"CLC( +__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) { + int index = 0; + + // lvalue swizzles + dst[index++].s0 = value.s0; + dst[index++].s1 = value.s0; + dst[index++].s2 = value.s0; + dst[index++].s012 = value; + dst[index++].s210 = value; + + // rvalue swizzles + vstore3(value.s0, 0, (__global BASETYPE*)(dst + index++)); + vstore3(value.s1, 0, (__global BASETYPE*)(dst + index++)); + vstore3(value.s2, 0, (__global BASETYPE*)(dst + index++)); + vstore3(value.s012, 0, (__global BASETYPE*)(dst + index++)); + vstore3(value.s210, 0, (__global BASETYPE*)(dst + index++)); +} +)CLC"; +}; + +template <> struct TestInfo<4> +{ + static const size_t vector_size = 4; + + static constexpr const char* kernel_source_xyzw = R"CLC( +__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) { + int index = 0; + + // lvalue swizzles + dst[index++].x = value.x; + dst[index++].y = value.x; + dst[index++].z = value.x; + dst[index++].w = value.x; + dst[index++].xyzw = value; + dst[index++].wzyx = value; + + // rvalue swizzles + dst[index++] = value.x; + dst[index++] = value.y; + dst[index++] = value.z; + dst[index++] = value.w; + dst[index++] = value.xyzw; + dst[index++] = value.wzyx; +} +)CLC"; + + static constexpr const char* kernel_source_rgba = R"CLC( +__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) { + int index = 0; + + // lvalue swizzles + dst[index++].r = value.r; + dst[index++].g = value.r; + dst[index++].b = value.r; + dst[index++].a = value.r; + dst[index++].rgba = value; + dst[index++].abgr = value; + + // rvalue swizzles + dst[index++] = value.r; + dst[index++] = value.g; + dst[index++] = value.b; + dst[index++] = value.a; + dst[index++] = value.rgba; + dst[index++] = value.abgr; +} +)CLC"; + + static constexpr const char* kernel_source_sN = R"CLC( +__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) { + int index = 0; + + // lvalue swizzles + dst[index++].s0 = value.s0; + dst[index++].s1 = value.s0; + dst[index++].s2 = value.s0; + dst[index++].s3 = value.s0; + dst[index++].s0123 = value; + dst[index++].s3210 = value; + + // rvalue swizzles + dst[index++] = value.s0; + dst[index++] = value.s1; + dst[index++] = value.s2; + dst[index++] = value.s3; + dst[index++] = value.s0123; + dst[index++] = value.s3210; +} +)CLC"; +}; + +template <> struct TestInfo<8> +{ + static const size_t vector_size = 8; + + static constexpr const char* kernel_source_xyzw = R"CLC( +__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) { + int index = 0; + + // xwzw only for first four components! + + // lvalue swizzles + dst[index++].x = value.x; + dst[index++].y = value.x; + dst[index++].z = value.x; + dst[index++].w = value.x; + dst[index++].s4 = value.s0; + dst[index++].s5 = value.s0; + dst[index++].s6 = value.s0; + dst[index++].s7 = value.s0; + dst[index].xyzw = value.s0123; + dst[index++].s4567 = value.s4567; + dst[index].s7654 = value.s0123; + dst[index++].wzyx = value.s4567; + + // rvalue swizzles + dst[index++] = value.x; + dst[index++] = value.y; + dst[index++] = value.z; + dst[index++] = value.w; + dst[index++] = value.s4; + dst[index++] = value.s5; + dst[index++] = value.s6; + dst[index++] = value.s7; + dst[index++] = (TYPE)(value.xyzw, value.s4567); + dst[index++] = (TYPE)(value.s7654, value.wzyx); +} +)CLC"; + static constexpr const char* kernel_source_rgba = R"CLC( +__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) { + int index = 0; + + // rgba only for first four components! + + // lvalue swizzles + dst[index++].r = value.r; + dst[index++].g = value.r; + dst[index++].b = value.r; + dst[index++].a = value.r; + dst[index++].s4 = value.s0; + dst[index++].s5 = value.s0; + dst[index++].s6 = value.s0; + dst[index++].s7 = value.s0; + dst[index].rgba = value.s0123; + dst[index++].s4567 = value.s4567; + dst[index].s7654 = value.s0123; + dst[index++].abgr = value.s4567; + + // rvalue swizzles + dst[index++] = value.r; + dst[index++] = value.g; + dst[index++] = value.b; + dst[index++] = value.a; + dst[index++] = value.s4; + dst[index++] = value.s5; + dst[index++] = value.s6; + dst[index++] = value.s7; + dst[index++] = (TYPE)(value.rgba, value.s4567); + dst[index++] = (TYPE)(value.s7654, value.abgr); +} +)CLC"; + static constexpr const char* kernel_source_sN = R"CLC( +__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) { + int index = 0; + + // lvalue swizzles + dst[index++].s0 = value.s0; + dst[index++].s1 = value.s0; + dst[index++].s2 = value.s0; + dst[index++].s3 = value.s0; + dst[index++].s4 = value.s0; + dst[index++].s5 = value.s0; + dst[index++].s6 = value.s0; + dst[index++].s7 = value.s0; + dst[index++].s01234567 = value; + dst[index++].s76543210 = value; + + // rvalue swizzles + dst[index++] = value.s0; + dst[index++] = value.s1; + dst[index++] = value.s2; + dst[index++] = value.s3; + dst[index++] = value.s4; + dst[index++] = value.s5; + dst[index++] = value.s6; + dst[index++] = value.s7; + dst[index++] = value.s01234567; + dst[index++] = value.s76543210; +} +)CLC"; +}; + +template <> struct TestInfo<16> +{ + static const size_t vector_size = 16; + + static constexpr const char* kernel_source_xyzw = R"CLC( +__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) { + int index = 0; + + // xwzw only for first four components! + + // lvalue swizzles + dst[index++].x = value.x; + dst[index++].y = value.x; + dst[index++].z = value.x; + dst[index++].w = value.x; + dst[index++].s4 = value.s0; + dst[index++].s5 = value.s0; + dst[index++].s6 = value.s0; + dst[index++].s7 = value.s0; + dst[index++].s8 = value.s0; + dst[index++].s9 = value.s0; + dst[index++].sa = value.s0; + dst[index++].sb = value.s0; + dst[index++].sc = value.s0; + dst[index++].sd = value.s0; + dst[index++].se = value.s0; + dst[index++].sf = value.s0; + dst[index].xyzw = value.s0123; + dst[index].s4567 = value.s4567; + dst[index].s89ab = value.s89ab; + dst[index++].scdef = value.scdef; + dst[index].sfedc = value.s0123; + dst[index].sba98 = value.s4567; + dst[index].s7654 = value.s89ab; + dst[index++].wzyx = value.scdef; + + // rvalue swizzles + dst[index++] = value.x; + dst[index++] = value.y; + dst[index++] = value.z; + dst[index++] = value.w; + dst[index++] = value.s4; + dst[index++] = value.s5; + dst[index++] = value.s6; + dst[index++] = value.s7; + dst[index++] = value.s8; + dst[index++] = value.s9; + dst[index++] = value.sa; + dst[index++] = value.sb; + dst[index++] = value.sc; + dst[index++] = value.sd; + dst[index++] = value.se; + dst[index++] = value.sf; + dst[index++] = (TYPE)(value.xyzw, value.s4567, value.s89abcdef); + dst[index++] = (TYPE)(value.sfedcba98, value.s7654, value.wzyx); +} +)CLC"; + static constexpr const char* kernel_source_rgba = R"CLC( +__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) { + int index = 0; + + // rgba only for first four components! + + // lvalue swizzles + dst[index++].r = value.r; + dst[index++].g = value.r; + dst[index++].b = value.r; + dst[index++].a = value.r; + dst[index++].s4 = value.s0; + dst[index++].s5 = value.s0; + dst[index++].s6 = value.s0; + dst[index++].s7 = value.s0; + dst[index++].s8 = value.s0; + dst[index++].s9 = value.s0; + dst[index++].sa = value.s0; + dst[index++].sb = value.s0; + dst[index++].sc = value.s0; + dst[index++].sd = value.s0; + dst[index++].se = value.s0; + dst[index++].sf = value.s0; + dst[index].rgba = value.s0123; + dst[index].s4567 = value.s4567; + dst[index].s89ab = value.s89ab; + dst[index++].scdef = value.scdef; + dst[index].sfedc = value.s0123; + dst[index].sba98 = value.s4567; + dst[index].s7654 = value.s89ab; + dst[index++].abgr = value.scdef; + + // rvalue swizzles + dst[index++] = value.r; + dst[index++] = value.g; + dst[index++] = value.b; + dst[index++] = value.a; + dst[index++] = value.s4; + dst[index++] = value.s5; + dst[index++] = value.s6; + dst[index++] = value.s7; + dst[index++] = value.s8; + dst[index++] = value.s9; + dst[index++] = value.sa; + dst[index++] = value.sb; + dst[index++] = value.sc; + dst[index++] = value.sd; + dst[index++] = value.se; + dst[index++] = value.sf; + dst[index++] = (TYPE)(value.rgba, value.s4567, value.s89abcdef); + dst[index++] = (TYPE)(value.sfedcba98, value.s7654, value.abgr); +} +)CLC"; + static constexpr const char* kernel_source_sN = R"CLC( +__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) { + int index = 0; + + // lvalue swizzles + dst[index++].s0 = value.s0; + dst[index++].s1 = value.s0; + dst[index++].s2 = value.s0; + dst[index++].s3 = value.s0; + dst[index++].s4 = value.s0; + dst[index++].s5 = value.s0; + dst[index++].s6 = value.s0; + dst[index++].s7 = value.s0; + dst[index++].s8 = value.s0; + dst[index++].s9 = value.s0; + dst[index++].sa = value.s0; + dst[index++].sb = value.s0; + dst[index++].sc = value.s0; + dst[index++].sd = value.s0; + dst[index++].se = value.s0; + dst[index++].sf = value.s0; + dst[index++].s0123456789abcdef = value; // lower-case + dst[index++].sFEDCBA9876543210 = value; // upper-case + + // rvalue swizzles + dst[index++] = value.s0; + dst[index++] = value.s1; + dst[index++] = value.s2; + dst[index++] = value.s3; + dst[index++] = value.s4; + dst[index++] = value.s5; + dst[index++] = value.s6; + dst[index++] = value.s7; + dst[index++] = value.s8; + dst[index++] = value.s9; + dst[index++] = value.sa; + dst[index++] = value.sb; + dst[index++] = value.sc; + dst[index++] = value.sd; + dst[index++] = value.se; + dst[index++] = value.sf; + dst[index++] = value.s0123456789abcdef; // lower-case + dst[index++] = value.sFEDCBA9876543210; // upper-case +} +)CLC"; +}; + +template +static void makeReference(std::vector& ref) +{ + // N single channel lvalue tests + // 2 multi-value lvalue tests + // N single channel rvalue tests + // 2 multi-value rvalue tests + const size_t refSize = (N + 2 + N + 2) * S; + + ref.resize(refSize); + std::fill(ref.begin(), ref.end(), 99); + + size_t dstIndex = 0; + + // single channel lvalue + for (size_t i = 0; i < N; i++) + { + ref[dstIndex * S + i] = 0; + ++dstIndex; + } + + // normal lvalue + for (size_t c = 0; c < N; c++) + { + ref[dstIndex * S + c] = c; + } + ++dstIndex; + + // reverse lvalue + for (size_t c = 0; c < N; c++) + { + ref[dstIndex * S + c] = N - c - 1; + } + ++dstIndex; + + // single channel rvalue + for (size_t i = 0; i < N; i++) + { + for (size_t c = 0; c < N; c++) + { + ref[dstIndex * S + c] = i; + } + ++dstIndex; + } + + // normal rvalue + for (size_t c = 0; c < N; c++) + { + ref[dstIndex * S + c] = c; + } + ++dstIndex; + + // reverse rvalue + for (size_t c = 0; c < N; c++) + { + ref[dstIndex * S + c] = N - c - 1; + } + ++dstIndex; + + assert(dstIndex * S == refSize); +} + +template +static int +test_vectype_case(const std::vector& value, const std::vector& reference, + cl_context context, cl_kernel kernel, cl_command_queue queue) +{ + cl_int error = CL_SUCCESS; + + clMemWrapper mem; + + std::vector buffer(reference.size(), 99); + mem = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + buffer.size() * sizeof(T), buffer.data(), &error); + test_error(error, "Unable to create test buffer"); + + error = clSetKernelArg(kernel, 0, value.size() * sizeof(T), value.data()); + test_error(error, "Unable to set value kernel arg"); + + error = clSetKernelArg(kernel, 1, sizeof(mem), &mem); + test_error(error, "Unable to set destination buffer kernel arg"); + + size_t global_work_size[] = { 1 }; + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, + NULL, 0, NULL, NULL); + test_error(error, "Unable to enqueue test kernel"); + + error = clFinish(queue); + test_error(error, "clFinish failed after test kernel"); + + error = + clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, buffer.size() * sizeof(T), + buffer.data(), 0, NULL, NULL); + test_error(error, "Unable to read data after test kernel"); + + if (buffer != reference) + { + log_error("Result buffer did not match reference buffer!\n"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +template +static int test_vectype(const char* type_name, cl_device_id device, + cl_context context, cl_command_queue queue) +{ + log_info(" testing type %s%d\n", type_name, N); + + cl_int error = CL_SUCCESS; + int result = TEST_PASS; + + clProgramWrapper program; + clKernelWrapper kernel; + + std::string buildOptions{ "-DTYPE=" }; + buildOptions += type_name; + buildOptions += std::to_string(N); + buildOptions += " -DBASETYPE="; + buildOptions += type_name; + + constexpr size_t S = TestInfo::vector_size; + + std::vector value(S); + std::iota(value.begin(), value.end(), 0); + + std::vector reference; + makeReference(reference); + + // XYZW swizzles: + + const char* xyzw_source = TestInfo::kernel_source_xyzw; + error = create_single_kernel_helper( + context, &program, &kernel, 1, &xyzw_source, "test_vector_swizzle_xyzw", + buildOptions.c_str()); + test_error(error, "Unable to create xyzw test kernel"); + + result |= test_vectype_case(value, reference, context, kernel, queue); + + // sN swizzles: + const char* sN_source = TestInfo::kernel_source_sN; + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sN_source, "test_vector_swizzle_sN", + buildOptions.c_str()); + test_error(error, "Unable to create sN test kernel"); + + result |= test_vectype_case(value, reference, context, kernel, queue); + + // RGBA swizzles for OpenCL 3.0 and newer: + const Version device_version = get_device_cl_version(device); + if (device_version >= Version(3, 0)) + { + const char* rgba_source = TestInfo::kernel_source_rgba; + error = create_single_kernel_helper( + context, &program, &kernel, 1, &rgba_source, + "test_vector_swizzle_rgba", buildOptions.c_str()); + test_error(error, "Unable to create rgba test kernel"); + + result |= test_vectype_case(value, reference, context, kernel, queue); + } + + return result; +} + +template +static int test_type(const char* type_name, cl_device_id device, + cl_context context, cl_command_queue queue) +{ + return test_vectype(type_name, device, context, queue) + | test_vectype(type_name, device, context, queue) + | test_vectype(type_name, device, context, queue) + | test_vectype(type_name, device, context, queue) + | test_vectype(type_name, device, context, queue); +} + +int test_vector_swizzle(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + int hasDouble = is_extension_available(device, "cl_khr_fp64"); + + int result = TEST_PASS; + result |= test_type("char", device, context, queue); + result |= test_type("uchar", device, context, queue); + result |= test_type("short", device, context, queue); + result |= test_type("ushort", device, context, queue); + result |= test_type("int", device, context, queue); + result |= test_type("uint", device, context, queue); + if (gHasLong) + { + result |= test_type("long", device, context, queue); + result |= test_type("ulong", device, context, queue); + } + result |= test_type("float", device, context, queue); + if (hasDouble) + { + result |= test_type("double", device, context, queue); + } + return result; +}