Initial open source release of OpenCL 2.2 CTS.

This commit is contained in:
Kedar Patil
2017-05-16 18:25:37 +05:30
parent 6911ba5116
commit 2821bf1323
1035 changed files with 343518 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
set(MODULE_NAME INTEGER_OPS)
set(${MODULE_NAME}_SOURCES
main.c
test_int_basic_ops.c
test_integers.cpp
test_upsample.cpp
test_intmul24.c test_intmad24.c
test_sub_sat.c test_add_sat.c
test_abs.c test_absdiff.c
test_unary_ops.cpp
verification_and_generation_functions.c
test_popcount.c
../../test_common/harness/ThreadPool.c
../../test_common/harness/mt19937.c
../../test_common/harness/conversions.c
../../test_common/harness/errorHelpers.c
../../test_common/harness/threadTesting.c
../../test_common/harness/testHarness.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/msvc9.c
../../test_common/harness/parseParameters.cpp
)
include(../CMakeCommon.txt)

View File

@@ -0,0 +1,28 @@
project
: requirements
<toolset>gcc:<cflags>-xc++
<toolset>msvc:<cflags>"/TP"
;
exe test_integer_ops
: main.c
test_abs.c
test_absdiff.c
test_add_sat.c
test_int.c
test_integers.cpp
test_intmad24.c
test_intmul24.c
test_long.c
test_sub_sat.c
test_uint.c
test_ulong.c
test_upsample.cpp
;
install dist
: test_integer_ops
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/integer_ops
<variant>release:<location>$(DIST)/release/tests/test_conformance/integer_ops
;

View File

@@ -0,0 +1,52 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.c \
test_popcount.c \
test_int_basic_ops.c \
test_integers.cpp \
test_upsample.cpp \
test_intmul24.c test_intmad24.c \
test_sub_sat.c test_add_sat.c \
test_abs.c test_absdiff.c \
test_unary_ops.cpp \
verification_and_generation_functions.c \
../../test_common/harness/conversions.c \
../../test_common/harness/errorHelpers.c \
../../test_common/harness/threadTesting.c \
../../test_common/harness/testHarness.c \
../../test_common/harness/mt19937.c \
../../test_common/harness/ThreadPool.c \
../../test_common/harness/kernelHelpers.c
DEFINES =
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
FRAMEWORK = $(SOURCES)
HEADERS =
TARGET = test_integer_ops
INCLUDE =
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,347 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include "procs.h"
#include "../../test_common/harness/testHarness.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
basefn basefn_list[] = {
test_integer_clz,
test_integer_ctz,
test_integer_hadd,
test_integer_rhadd,
test_integer_mul_hi,
test_integer_rotate,
test_integer_clamp,
test_integer_mad_sat,
test_integer_mad_hi,
test_integer_min,
test_integer_max,
test_integer_upsample,
test_abs,
test_absdiff,
test_add_sat,
test_sub_sat,
test_integer_addAssign,
test_integer_subtractAssign,
test_integer_multiplyAssign,
test_integer_divideAssign,
test_integer_moduloAssign,
test_integer_andAssign,
test_integer_orAssign,
test_integer_exclusiveOrAssign,
test_unary_ops_increment,
test_unary_ops_decrement,
test_unary_ops_full,
test_intmul24,
test_intmad24,
test_long_math,
test_long_logic,
test_long_shift,
test_long_compare,
test_ulong_math,
test_ulong_logic,
test_ulong_shift,
test_ulong_compare,
test_int_math,
test_int_logic,
test_int_shift,
test_int_compare,
test_uint_math,
test_uint_logic,
test_uint_shift,
test_uint_compare,
test_short_math,
test_short_logic,
test_short_shift,
test_short_compare,
test_ushort_math,
test_ushort_logic,
test_ushort_shift,
test_ushort_compare,
test_char_math,
test_char_logic,
test_char_shift,
test_char_compare,
test_uchar_math,
test_uchar_logic,
test_uchar_shift,
test_uchar_compare,
test_popcount,
// Quick
test_quick_long_math,
test_quick_long_logic,
test_quick_long_shift,
test_quick_long_compare,
test_quick_ulong_math,
test_quick_ulong_logic,
test_quick_ulong_shift,
test_quick_ulong_compare,
test_quick_int_math,
test_quick_int_logic,
test_quick_int_shift,
test_quick_int_compare,
test_quick_uint_math,
test_quick_uint_logic,
test_quick_uint_shift,
test_quick_uint_compare,
test_quick_short_math,
test_quick_short_logic,
test_quick_short_shift,
test_quick_short_compare,
test_quick_ushort_math,
test_quick_ushort_logic,
test_quick_ushort_shift,
test_quick_ushort_compare,
test_quick_char_math,
test_quick_char_logic,
test_quick_char_shift,
test_quick_char_compare,
test_quick_uchar_math,
test_quick_uchar_logic,
test_quick_uchar_shift,
test_quick_uchar_compare,
test_vector_scalar_ops,
};
const char *basefn_names[] = {
"integer_clz",
"integer_ctz",
"integer_hadd",
"integer_rhadd",
"integer_mul_hi",
"integer_rotate",
"integer_clamp",
"integer_mad_sat",
"integer_mad_hi",
"integer_min",
"integer_max",
"integer_upsample",
"integer_abs",
"integer_abs_diff",
"integer_add_sat",
"integer_sub_sat",
"integer_addAssign",
"integer_subtractAssign",
"integer_multiplyAssign",
"integer_divideAssign",
"integer_moduloAssign",
"integer_andAssign",
"integer_orAssign",
"integer_exclusiveOrAssign",
"unary_ops_increment",
"unary_ops_decrement",
"unary_ops_full",
"integer_mul24",
"integer_mad24",
"long_math",
"long_logic",
"long_shift",
"long_compare",
"ulong_math",
"ulong_logic",
"ulong_shift",
"ulong_compare",
"int_math",
"int_logic",
"int_shift",
"int_compare",
"uint_math",
"uint_logic",
"uint_shift",
"uint_compare",
"short_math",
"short_logic",
"short_shift",
"short_compare",
"ushort_math",
"ushort_logic",
"ushort_shift",
"ushort_compare",
"char_math",
"char_logic",
"char_shift",
"char_compare",
"uchar_math",
"uchar_logic",
"uchar_shift",
"uchar_compare",
"popcount",
// Quick
"quick_long_math",
"quick_long_logic",
"quick_long_shift",
"quick_long_compare",
"quick_ulong_math",
"quick_ulong_logic",
"quick_ulong_shift",
"quick_ulong_compare",
"quick_int_math",
"quick_int_logic",
"quick_int_shift",
"quick_int_compare",
"quick_uint_math",
"quick_uint_logic",
"quick_uint_shift",
"quick_uint_compare",
"quick_short_math",
"quick_short_logic",
"quick_short_shift",
"quick_short_compare",
"quick_ushort_math",
"quick_ushort_logic",
"quick_ushort_shift",
"quick_ushort_compare",
"quick_char_math",
"quick_char_logic",
"quick_char_shift",
"quick_char_compare",
"quick_uchar_math",
"quick_uchar_logic",
"quick_uchar_shift",
"quick_uchar_compare",
"vector_scalar",
};
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
int num_fns = sizeof(basefn_names) / sizeof(char *);
void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d )
{
static const cl_long sUniqueValues[] = { 0x3333333333333333LL, 0x5555555555555555LL, 0x9999999999999999LL, 0xaaaaaaaaaaaaaaaaLL, 0xccccccccccccccccLL,
0x3030303030303030LL, 0x5050505050505050LL, 0x9090909090909090LL, 0xa0a0a0a0a0a0a0a0LL, 0xc0c0c0c0c0c0c0c0LL, 0xf0f0f0f0f0f0f0f0LL,
0x0303030303030303LL, 0x0505050505050505LL, 0x0909090909090909LL, 0x0a0a0a0a0a0a0a0aLL, 0x0c0c0c0c0c0c0c0cLL, 0x0f0f0f0f0f0f0f0fLL,
0x3300330033003300LL, 0x5500550055005500LL, 0x9900990099009900LL, 0xaa00aa00aa00aa00LL, 0xcc00cc00cc00cc00LL, 0xff00ff00ff00ff00LL,
0x0033003300330033LL, 0x0055005500550055LL, 0x0099009900990099LL, 0x00aa00aa00aa00aaLL, 0x00cc00cc00cc00ccLL, 0x00ff00ff00ff00ffLL,
0x3333333300000000LL, 0x5555555500000000LL, 0x9999999900000000LL, 0xaaaaaaaa00000000LL, 0xcccccccc00000000LL, 0xffffffff00000000LL,
0x0000000033333333LL, 0x0000000055555555LL, 0x0000000099999999LL, 0x00000000aaaaaaaaLL, 0x00000000ccccccccLL, 0x00000000ffffffffLL,
0x3333000000003333LL, 0x5555000000005555LL, 0x9999000000009999LL, 0xaaaa00000000aaaaLL, 0xcccc00000000ccccLL, 0xffff00000000ffffLL};
static cl_long sSpecialValues[ 128 + 128 + 128 + ( sizeof( sUniqueValues ) / sizeof( sUniqueValues[ 0 ] ) ) ] = { 0 };
if( sSpecialValues[ 0 ] == 0 )
{
// Init the power-of-two special values
for( size_t i = 0; i < 64; i++ )
{
sSpecialValues[ i ] = 1LL << i;
sSpecialValues[ i + 64 ] = -1LL << i;
sSpecialValues[ i + 128 ] = sSpecialValues[ i ] - 1;
sSpecialValues[ i + 128 + 64 ] = sSpecialValues[ i ] - 1;
sSpecialValues[ i + 256 ] = sSpecialValues[ i ] + 1;
sSpecialValues[ i + 256 + 64 ] = sSpecialValues[ i ] + 1;
}
memcpy( &sSpecialValues[ 128 + 128 + 128 ], sUniqueValues, sizeof( sUniqueValues ) );
}
size_t i, aIdx = 0, bIdx = 0;
size_t numSpecials = sizeof( sSpecialValues ) / sizeof( sSpecialValues[ 0 ] );
for( i = 0; i < numElements; i++ )
{
outBufferA[ i ] = sSpecialValues[ aIdx ];
outBufferB[ i ] = sSpecialValues[ bIdx ];
bIdx++;
if( bIdx == numSpecials )
{
bIdx = 0;
aIdx++;
if( aIdx == numSpecials )
break;
}
}
if( i < numElements )
{
// Fill remainder with random values
for( ; i < numElements; i++ )
{
int a = (int)genrand_int32(d);
int b = (int)genrand_int32(d);
outBufferA[ i ] = ((cl_long)a <<33 | (cl_long)b) ^ ((cl_long)b << 16);
a = (int)genrand_int32(d);
b = (int)genrand_int32(d);
outBufferB[ i ] = ((cl_long)a <<33 | (cl_long)b) ^ ((cl_long)b << 16);
}
}
else if( aIdx < numSpecials )
{
log_info( "WARNING: Not enough space to fill all special values for long test! (need %d additional elements)\n", (int)( ( numSpecials - aIdx ) * numSpecials ) );
}
}
int main(int argc, const char *argv[])
{
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false /* image support required */, false /* force no context creation */, 0 );
}

View File

@@ -0,0 +1,143 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/threadTesting.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/mt19937.h"
// The number of errors to print out for each test
#define MAX_ERRORS_TO_PRINT 10
extern const size_t vector_aligns[];
extern int create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
extern void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d );
extern int test_popcount(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_clz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_ctz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_hadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_rhadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_mul_hi(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_rotate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_clamp(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_mad_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_mad_hi(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_upsample(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_addAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_subtractAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_multiplyAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_divideAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_moduloAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_andAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_orAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_integer_exclusiveOrAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_abs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_absdiff(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_add_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_sub_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmul24(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmad24(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_quick_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_unary_ops_full(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_unary_ops_increment(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_unary_ops_decrement(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vector_scalar_ops(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);

View File

@@ -0,0 +1,31 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _testBase_h
#define _testBase_h
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#endif // _testBase_h

View File

@@ -0,0 +1,335 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static int verify_abs_char( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
{
const cl_char *inA = (const cl_char*) p;
const cl_uchar *outptr = (const cl_uchar*) q;
size_t i;
for( i = 0; i < n; i++ )
{
cl_uchar r = inA[i];
if( inA[i] < 0 )
r = -inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for abs( (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_abs_short( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
{
const cl_short *inA = (const cl_short*) p;
const cl_ushort *outptr = (const cl_ushort*) q;
size_t i;
for( i = 0; i < n; i++ )
{
cl_ushort r = inA[i];
if( inA[i] < 0 )
r = -inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for abs( (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_abs_int( const void *p, const void *q, size_t n, const char *sizeName , size_t vecSize)
{
const cl_int *inA = (const cl_int*) p;
const cl_uint *outptr = (const cl_uint*) q;
size_t i;
for( i = 0; i < n; i++ )
{
cl_uint r = inA[i];
if( inA[i] < 0 )
r = -inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for abs( (int%s) 0x%2.2x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_abs_long( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
{
const cl_long *inA = (const cl_long*) p;
const cl_ulong *outptr = (const cl_ulong*) q;
size_t i;
for( i = 0; i < n; i++ )
{
cl_ulong r = inA[i];
if( inA[i] < 0 )
r = -inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for abs( (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_abs_uchar( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
{
const cl_uchar *inA = (const cl_uchar*) p;
const cl_uchar *outptr = (const cl_uchar*) q;
size_t i;
for( i = 0; i < n; i++ )
{
cl_uchar r = inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for abs( (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_abs_ushort( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
{
const cl_ushort *inA = (const cl_ushort*) p;
const cl_ushort *outptr = (const cl_ushort*) q;
size_t i;
for( i = 0; i < n; i++ )
{
cl_ushort r = inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for abs( (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_abs_uint( const void *p, const void *q, size_t n, const char *sizeName , size_t vecSize)
{
const cl_uint *inA = (const cl_uint*) p;
const cl_uint *outptr = (const cl_uint*) q;
size_t i;
for( i = 0; i < n; i++ )
{
cl_uint r = inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for abs( (int%s) 0x%2.2x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_abs_ulong( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
{
const cl_ulong *inA = (const cl_ulong*) p;
const cl_ulong *outptr = (const cl_ulong*) q;
size_t i;
for( i = 0; i < n; i++ )
{
cl_ulong r = inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for abs( (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
}
return 0;
}
typedef int (*verifyFunc)( const void *, const void *, size_t n, const char *sizeName, size_t vecSize );
static const verifyFunc verify[] = {
verify_abs_char, verify_abs_short, verify_abs_int, verify_abs_long,
verify_abs_uchar, verify_abs_ushort, verify_abs_uint, verify_abs_ulong
};
static const char *test_str_names[] = { "char", "short", "int", "long" ,
"uchar", "ushort", "uint", "ulong"};
static const char *test_ustr_names[] = { "uchar", "ushort", "uint", "ulong" ,
"uchar", "ushort", "uint", "ulong"};
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
static const char *vector_size_names_io_types[] = { "", "2", "", "4", "8", "16" };
static const size_t kSizes[9] = { 1, 2, 4, 8, 1, 2, 4, 8 };
static const char * source_loads[] = {
"srcA[tid]",
"vload3(tid, srcA)"
};
static const char * dest_stores[] = {
" dst[tid] = tmp;\n",
" vstore3(tmp, tid, dst);\n"
};
int test_abs(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_int *input_ptr, *output_ptr, *p;
int err;
int i;
cl_uint vectorSizeIdx;
cl_uint type;
MTdata d;
int fail_count = 0;
size_t length = sizeof(cl_int) * 4 * n_elems;
input_ptr = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
p = input_ptr;
d = init_genrand( gRandomSeed );
for (i=0; i<n_elems * 4; i++)
p[i] = genrand_int32(d);
free_mtdata(d); d = NULL;
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
{
//embedded devices don't support long/ulong so skip over
if (! gHasLong && strstr(test_str_names[type],"long"))
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
continue;
}
verifyFunc f = verify[ type ];
size_t elementCount = length / kSizes[type];
cl_mem streams[2];
log_info( "%s", test_str_names[type] );
fflush( stdout );
// Set up data streams for the type
streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
for( vectorSizeIdx = 0; vectorSizeIdx < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSizeIdx++ )
{
cl_program program = NULL;
cl_kernel kernel = NULL;
const char *source[] = {
"__kernel void test_abs_",
test_str_names[type],
vector_size_names[vectorSizeIdx],
"(__global ", test_str_names[type],
vector_size_names_io_types[vectorSizeIdx],
" *srcA, __global ", test_ustr_names[type],
vector_size_names_io_types[vectorSizeIdx],
" *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" ", test_ustr_names[type], vector_size_names[vectorSizeIdx],
" tmp = abs(", source_loads[!!(vector_sizes[vectorSizeIdx]==3)], ");\n",
dest_stores[!!(vector_sizes[vectorSizeIdx]==3)],
"}\n"
};
char kernelName[128];
snprintf( kernelName, sizeof( kernelName ), "test_abs_%s%s", test_str_names[type], vector_size_names[vectorSizeIdx] );
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
if (err)
return -1;
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
//Wipe the output buffer clean
uint32_t pattern = 0xdeadbeef;
memset_pattern4( output_ptr, &pattern, length );
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
size_t size = elementCount / ((vector_sizes[vectorSizeIdx]));
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
char *inP = (char *)input_ptr;
char *outP = (char *)output_ptr;
for( size_t e = 0; e < size; e++ )
{
if( f( inP, outP, (vector_sizes[vectorSizeIdx]), vector_size_names[vectorSizeIdx], vector_sizes[vectorSizeIdx] ) ) {
++fail_count; break; // return -1;
}
inP += kSizes[type] * (vector_sizes[vectorSizeIdx] );
outP += kSizes[type] * (vector_sizes[vectorSizeIdx]);
}
clReleaseKernel( kernel );
clReleaseProgram( program );
log_info( "." );
fflush( stdout );
}
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
log_info( "done\n" );
}
if(fail_count) {
log_info("Failed on %d types\n", fail_count);
return -1;
}
log_info("ABS test passed\n");
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,374 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static int verify_absdiff_char( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
{
const cl_char *inA = (const cl_char *)p;
const cl_char *inB = (const cl_char *)q;
const cl_uchar *outptr = (const cl_uchar *)r;
size_t i;
for( i = 0; i < n; i++ )
{
cl_uchar r = inA[i] - inB[i];
if( inB[i] > inA[i] )
r = inB[i] - inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for absdiff( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_absdiff_uchar( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
{
const cl_uchar *inA = (const cl_uchar *)p;
const cl_uchar *inB = (const cl_uchar *)q;
const cl_uchar *outptr = (const cl_uchar *)r;
size_t i;
for( i = 0; i < n; i++ )
{
cl_uchar r = inA[i] - inB[i];
if( inB[i] > inA[i] )
r = inB[i] - inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for absdiff( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_absdiff_short( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
{
const cl_short *inA = (const cl_short *)p;
const cl_short *inB = (const cl_short *)q;
const cl_ushort *outptr = (const cl_ushort *)r;
size_t i;
for( i = 0; i < n; i++ )
{
cl_ushort r = inA[i] - inB[i];
if( inB[i] > inA[i] )
r = inB[i] - inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for absdiff( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_absdiff_ushort( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
{
const cl_ushort *inA = (const cl_ushort *)p;
const cl_ushort *inB = (const cl_ushort *)q;
const cl_ushort *outptr = (const cl_ushort *)r;
size_t i;
for( i = 0; i < n; i++ )
{
cl_ushort r = inA[i] - inB[i];
if( inB[i] > inA[i] )
r = inB[i] - inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for absdiff( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_absdiff_int( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
{
const cl_int *inA = (const cl_int *)p;
const cl_int *inB = (const cl_int *)q;
const cl_uint *outptr = (const cl_uint *)r;
size_t i;
for( i = 0; i < n; i++ )
{
cl_uint r = inA[i] - inB[i];
if( inB[i] > inA[i] )
r = inB[i] - inA[i];
if( r != outptr[i] )
{
log_info( "%ld) Failure for absdiff( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] );
return -1;
}
}
return 0;
}
static int verify_absdiff_uint( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
{
const cl_uint *inA = (const cl_uint *)p;
const cl_uint *inB = (const cl_uint *)q;
const cl_uint *outptr = (const cl_uint *)r;
size_t i;
for( i = 0; i < n; i++ )
{
cl_uint r = inA[i] - inB[i];
if( inB[i] > inA[i] )
r = inB[i] - inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for absdiff( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_absdiff_long( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
{
const cl_long *inA = (const cl_long *)p;
const cl_long *inB = (const cl_long *)q;
const cl_ulong *outptr = (const cl_ulong *)r;
size_t i;
for( i = 0; i < n; i++ )
{
cl_ulong r = inA[i] - inB[i];
if( inB[i] > inA[i] )
r = inB[i] - inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for absdiff( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_absdiff_ulong( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
{
const cl_ulong *inA = (const cl_ulong *)p;
const cl_ulong *inB = (const cl_ulong *)q;
const cl_ulong *outptr = (const cl_ulong *)r;
size_t i;
for( i = 0; i < n; i++ )
{
cl_ulong r = inA[i] - inB[i];
if( inB[i] > inA[i] )
r = inB[i] - inA[i];
if( r != outptr[i] )
{ log_info( "%ld) Failure for absdiff( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
typedef int (*verifyFunc)( const void *, const void *, const void *, size_t n, const char *sizeName, size_t vecSize);
static const verifyFunc verify[] = { verify_absdiff_char, verify_absdiff_uchar,
verify_absdiff_short, verify_absdiff_ushort,
verify_absdiff_int, verify_absdiff_uint,
verify_absdiff_long, verify_absdiff_ulong };
//FIXME: enable long and ulong when GPU path is working
static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
//FIXME: enable "16" when support for > 64 byte vectors go into LLVM
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
static const char *vector_param_size_names[] = { "", "2", "", "4", "8", "16" };
static const size_t kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
static void printSrc(const char *src[], int nSrcStrings) {
int i;
for(i = 0; i < nSrcStrings; ++i) {
log_info("%s", src[i]);
}
}
int test_absdiff(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_int *input_ptr[2], *output_ptr, *p;
int err;
int i;
cl_uint vectorSize;
cl_uint type;
MTdata d;
int fail_count = 0;
size_t length = sizeof(cl_int) * 4 * n_elems;
input_ptr[0] = (cl_int*)malloc(length);
input_ptr[1] = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
d = init_genrand( gRandomSeed );
p = input_ptr[0];
for (i=0; i<4 * n_elems; i++)
p[i] = genrand_int32(d);
p = input_ptr[1];
for (i=0; i<4 * n_elems; i++)
p[i] = genrand_int32(d);
free_mtdata(d); d = NULL;
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
{
//embedded devices don't support long/ulong so skip over
if (! gHasLong && strstr(test_str_names[type],"long"))
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
continue;
}
verifyFunc f = verify[ type ];
// Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
size_t elementCount = length / kSizes[type];
cl_mem streams[3];
log_info( "%s", test_str_names[type] );
fflush( stdout );
// Set up data streams for the type
streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
{
cl_program program = NULL;
cl_kernel kernel = NULL;
const char *source[] = {
"__kernel void test_absdiff_", test_str_names[type], vector_size_names[vectorSize],
"(__global ", test_str_names[type], vector_param_size_names[vectorSize],
" *srcA, __global ", test_str_names[type], vector_param_size_names[vectorSize],
" *srcB, __global u", test_str_names[type & -2], vector_param_size_names[vectorSize],
" *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" ", test_str_names[type], vector_size_names[vectorSize], " sA, sB;\n",
" sA = ", ( vector_sizes[ vectorSize ] == 3 ) ? "vload3( tid, srcA )" : "srcA[tid]", ";\n",
" sB = ", ( vector_sizes[ vectorSize ] == 3 ) ? "vload3( tid, srcB )" : "srcB[tid]", ";\n",
" u", test_str_names[type & -2], vector_size_names[vectorSize], " dstVal = abs_diff(sA, sB);\n"
" ", ( vector_sizes[ vectorSize ] == 3 ) ? "vstore3( dstVal, tid, dst )" : "dst[ tid ] = dstVal", ";\n",
"}\n" };
char kernelName[128];
snprintf( kernelName, sizeof( kernelName ), "test_absdiff_%s%s", test_str_names[type], vector_size_names[vectorSize] );
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
if (err) {
return -1;
}
#if 0
log_info("About to run\n");
log_info("=====\n");
printSrc(source, sizeof(source)/sizeof(source[0]));
log_info("=====\n");
#endif
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
//Wipe the output buffer clean
uint32_t pattern = 0xdeadbeef;
memset_pattern4( output_ptr, &pattern, length );
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
size_t size = elementCount / (vector_sizes[vectorSize]);
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
char *inP = (char *)input_ptr[0];
char *inP2 = (char *)input_ptr[1];
char *outP = (char *)output_ptr;
for( size_t e = 0; e < size; e++ )
{
if( f( inP, inP2, outP, (vector_sizes[vectorSize]), vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
printSrc(source, sizeof(source)/sizeof(source[0]));
++fail_count; break; // return -1;
}
inP += kSizes[type] * ( (vector_sizes[vectorSize]) );
inP2 += kSizes[type] * ( (vector_sizes[vectorSize]) );
outP += kSizes[type] * ( (vector_sizes[vectorSize]) );
}
clReleaseKernel( kernel );
clReleaseProgram( program );
log_info( "." );
fflush( stdout );
}
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
clReleaseMemObject( streams[2] );
log_info( "done\n" );
}
if(fail_count) {
log_info("Failed on %d types\n", fail_count);
return -1;
}
log_info("ABS_DIFF test passed\n");
free(input_ptr[0]);
free(input_ptr[1]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,378 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#define UCHAR_MIN 0
#define USHRT_MIN 0
#define UINT_MIN 0
#ifndef MAX
#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) )
#endif
#ifndef MIN
#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) )
#endif
static int verify_addsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
{
int i;
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
r = MAX( r, CL_CHAR_MIN );
r = MIN( r, CL_CHAR_MAX );
if( r != outptr[i] )
{ log_info( "\n%d) Failure for add_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_addsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const cl_uchar *outptr, int n, const char *sizeName, int vecSize )
{
int i;
for( i = 0; i < n; i++ )
{
cl_int r = (int) inA[i] + (int) inB[i];
r = MAX( r, 0 );
r = MIN( r, CL_UCHAR_MAX );
if( r != outptr[i] )
{ log_info( "\n%d) Failure for add_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_addsat_short( const cl_short *inA, const cl_short *inB, const cl_short *outptr, int n, const char *sizeName , int vecSize)
{
int i;
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
r = MAX( r, CL_SHRT_MIN );
r = MIN( r, CL_SHRT_MAX );
if( r != outptr[i] )
{ log_info( "\n%d) Failure for add_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_addsat_ushort( const cl_ushort *inA, const cl_ushort *inB, const cl_ushort *outptr, int n, const char *sizeName , int vecSize)
{
int i;
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
r = MAX( r, 0 );
r = MIN( r, CL_USHRT_MAX );
if( r != outptr[i] )
{ log_info( "\n%d) Failure for add_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_addsat_int( const cl_int *inA, const cl_int *inB, const cl_int *outptr, int n, const char *sizeName , int vecSize)
{
int i;
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) ((cl_uint) inA[i] + (cl_uint)inB[i]);
if( inB[i] > 0 )
{
if( r < inA[i] )
r = CL_INT_MAX;
}
else
{
if( r > inA[i] )
r = CL_INT_MIN;
}
if( r != outptr[i] )
{ log_info( "\n%d) Failure for add_sat( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_addsat_uint( const cl_uint *inA, const cl_uint *inB, const cl_uint *outptr, int n, const char *sizeName , int vecSize)
{
int i;
for( i = 0; i < n; i++ )
{
cl_uint r = inA[i] + inB[i];
if( r < inA[i] )
r = CL_UINT_MAX;
if( r != outptr[i] )
{ log_info( "\n%d) Failure for add_sat( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_addsat_long( const cl_long *inA, const cl_long *inB, const cl_long *outptr, int n, const char *sizeName , int vecSize)
{
int i;
for( i = 0; i < n; i++ )
{
cl_long r = (cl_long)((cl_ulong)inA[i] + (cl_ulong)inB[i]);
if( inB[i] > 0 )
{
if( r < inA[i] )
r = CL_LONG_MAX;
}
else
{
if( r > inA[i] )
r = CL_LONG_MIN;
}
if( r != outptr[i] )
{ log_info( "%d) Failure for add_sat( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_addsat_ulong( const cl_ulong *inA, const cl_ulong *inB, const cl_ulong *outptr, int n, const char *sizeName , int vecSize)
{
int i;
for( i = 0; i < n; i++ )
{
cl_ulong r = inA[i] + inB[i];
if( r < inA[i] )
r = CL_ULONG_MAX;
if( r != outptr[i] )
{ log_info( "%d) Failure for add_sat( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName, int );
static const verifyFunc verify[] = { (verifyFunc) verify_addsat_char, (verifyFunc) verify_addsat_uchar,
(verifyFunc) verify_addsat_short, (verifyFunc) verify_addsat_ushort,
(verifyFunc) verify_addsat_int, (verifyFunc) verify_addsat_uint,
(verifyFunc) verify_addsat_long, (verifyFunc) verify_addsat_ulong };
//FIXME: enable long and ulong when GPU path is working
static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
//FIXME: enable "16" when support for > 64 byte vectors go into LLVM
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
static const size_t kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
int test_add_sat(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_int *input_ptr[2], *output_ptr, *p;
int err;
int i;
cl_uint vectorSize;
cl_uint type;
MTdata d;
int fail_count = 0;
size_t length = sizeof(cl_int) * 4 * n_elems;
input_ptr[0] = (cl_int*)malloc(length);
input_ptr[1] = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
d = init_genrand( gRandomSeed );
p = input_ptr[0];
for (i=0; i<4 * n_elems; i++)
p[i] = genrand_int32(d);
p = input_ptr[1];
for (i=0; i<4 * n_elems; i++)
p[i] = genrand_int32(d);
free_mtdata(d); d = NULL;
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
{
//embedded devices don't support long/ulong so skip over
if (! gHasLong && strstr(test_str_names[type],"long"))
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
continue;
}
verifyFunc f = verify[ type ];
// Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
size_t elementCount = length / kSizes[type];
cl_mem streams[3];
log_info( "%s", test_str_names[type] );
fflush( stdout );
// Set up data streams for the type
streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
{
cl_program program = NULL;
cl_kernel kernel = NULL;
const char *source[] = {
"__kernel void test_add_sat_", test_str_names[type], vector_size_names[vectorSize],
"(__global ", test_str_names[type], vector_size_names[vectorSize],
" *srcA, __global ", test_str_names[type], vector_size_names[vectorSize],
" *srcB, __global ", test_str_names[type], vector_size_names[vectorSize],
" *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" ", test_str_names[type], vector_size_names[vectorSize], " tmp = add_sat(srcA[tid], srcB[tid]);\n"
" dst[tid] = tmp;\n"
"}\n" };
const char *sourceV3[] = {
"__kernel void test_add_sat_", test_str_names[type], vector_size_names[vectorSize],
"(__global ", test_str_names[type],
" *srcA, __global ", test_str_names[type],
" *srcB, __global ", test_str_names[type],
" *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" ", test_str_names[type], vector_size_names[vectorSize], " tmp = add_sat(vload3(tid, srcA), vload3(tid, srcB));\n"
" vstore3(tmp, tid, dst);\n"
"}\n" };
char kernelName[128];
snprintf( kernelName, sizeof( kernelName ), "test_add_sat_%s%s", test_str_names[type], vector_size_names[vectorSize] );
if(vector_sizes[vectorSize] != 3)
{
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
}
else
{
err = create_single_kernel_helper(context, &program, &kernel, sizeof( sourceV3 ) / sizeof( sourceV3[0] ), sourceV3, kernelName );
}
if (err)
return -1;
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
//Wipe the output buffer clean
uint32_t pattern = 0xdeadbeef;
memset_pattern4( output_ptr, &pattern, length );
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
size_t size = elementCount / (vector_sizes[vectorSize]);
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clExecuteKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadArray failed\n");
return -1;
}
char *inP = (char *)input_ptr[0];
char *inP2 = (char *)input_ptr[1];
char *outP = (char *)output_ptr;
for( size_t e = 0; e < size; e++ )
{
if( f( inP, inP2, outP, (vector_sizes[vectorSize]), vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
++fail_count; break; // return -1;
}
inP += kSizes[type] * vector_sizes[vectorSize];
inP2 += kSizes[type] * vector_sizes[vectorSize];
outP += kSizes[type] * vector_sizes[vectorSize];
}
clReleaseKernel( kernel );
clReleaseProgram( program );
log_info( "." );
fflush( stdout );
}
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
clReleaseMemObject( streams[2] );
log_info( "done\n" );
}
if(fail_count) {
log_info("Failed on %d types\n", fail_count);
return -1;
}
log_info("ADD_SAT test passed\n");
free(input_ptr[0]);
free(input_ptr[1]);
free(output_ptr);
return err;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,365 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#define NUM_PROGRAMS 6
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
const char *int_mad24_kernel_code =
"__kernel void test_int_mad24(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
"}\n";
const char *int2_mad24_kernel_code =
"__kernel void test_int2_mad24(__global int2 *srcA, __global int2 *srcB, __global int2 *srcC, __global int2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
"}\n";
const char *int3_mad24_kernel_code =
"__kernel void test_int3_mad24(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" int3 tmp = mad24(vload3(tid, srcA), vload3(tid, srcB), vload3(tid, srcC));\n"
" vstore3(tmp, tid, dst);\n"
"}\n";
const char *int4_mad24_kernel_code =
"__kernel void test_int4_mad24(__global int4 *srcA, __global int4 *srcB, __global int4 *srcC, __global int4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
"}\n";
const char *int8_mad24_kernel_code =
"__kernel void test_int8_mad24(__global int8 *srcA, __global int8 *srcB, __global int8 *srcC, __global int8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
"}\n";
const char *int16_mad24_kernel_code =
"__kernel void test_int16_mad24(__global int16 *srcA, __global int16 *srcB, __global int16 *srcC, __global int16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
"}\n";
const char *uint_mad24_kernel_code =
"__kernel void test_uint_mad24(__global uint *srcA, __global uint *srcB, __global uint *srcC, __global uint *dst)\n"
"{\n"
" uint tid = get_global_id(0);\n"
"\n"
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
"}\n";
const char *uint2_mad24_kernel_code =
"__kernel void test_uint2_mad24(__global uint2 *srcA, __global uint2 *srcB, __global uint2 *srcC, __global uint2 *dst)\n"
"{\n"
" uint tid = get_global_id(0);\n"
"\n"
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
"}\n";
const char *uint3_mad24_kernel_code =
"__kernel void test_uint3_mad24(__global uint *srcA, __global uint *srcB, __global uint *srcC, __global uint *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" uint3 tmp = mad24(vload3(tid, srcA), vload3(tid, srcB), vload3(tid, srcC));\n"
" vstore3(tmp, tid, dst);\n"
"}\n";
const char *uint4_mad24_kernel_code =
"__kernel void test_uint4_mad24(__global uint4 *srcA, __global uint4 *srcB, __global uint4 *srcC, __global uint4 *dst)\n"
"{\n"
" uint tid = get_global_id(0);\n"
"\n"
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
"}\n";
const char *uint8_mad24_kernel_code =
"__kernel void test_uint8_mad24(__global uint8 *srcA, __global uint8 *srcB, __global uint8 *srcC, __global uint8 *dst)\n"
"{\n"
" uint tid = get_global_id(0);\n"
"\n"
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
"}\n";
const char *uint16_mad24_kernel_code =
"__kernel void test_uint16_mad24(__global uint16 *srcA, __global uint16 *srcB, __global uint16 *srcC, __global uint16 *dst)\n"
"{\n"
" uint tid = get_global_id(0);\n"
"\n"
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
"}\n";
int
verify_int_mad24(int *inptrA, int *inptrB, int *inptrC, int *outptr, size_t n, size_t vecSize)
{
int r;
size_t i;
for (i=0; i<n; i++)
{
int a = inptrA[i];
int b = inptrB[i];
r = a * b + inptrC[i];
if (r != outptr[i])
{
log_error( "Failed at %ld) 0x%8.8x * 0x%8.8x + 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, a, b, inptrC[i], r, outptr[i] );
return -1;
}
}
return 0;
}
int
verify_uint_mad24(cl_uint *inptrA, cl_uint *inptrB, cl_uint *inptrC, cl_uint *outptr, size_t n, size_t vecSize)
{
cl_uint r;
size_t i;
for (i=0; i<n; i++)
{
cl_uint a = inptrA[i] & 0xFFFFFFU;
cl_uint b = inptrB[i] & 0xFFFFFFU;
r = a * b + inptrC[i];
if (r != outptr[i])
{
log_error( "Failed at %ld) 0x%8.8x * 0x%8.8x + 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, a, b, inptrC[i], r, outptr[i] );
return -1;
}
}
return 0;
}
static const char *test_str_names[] = { "int", "int2", "int3", "int4", "int8", "int16", "uint", "uint2", "uint3", "uint4", "uint8", "uint16" };
static inline int random_int24( MTdata d )
{
int result = genrand_int32(d);
return (result << 8) >> 8;
}
static inline int random_int32( MTdata d )
{
return genrand_int32(d);
}
int
test_intmad24(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_mem streams[4];
cl_int *input_ptr[3], *output_ptr, *p;
cl_program program[2*NUM_PROGRAMS];
cl_kernel kernel[2*NUM_PROGRAMS];
size_t threads[1];
int num_elements;
int err;
int i;
MTdata d;
size_t length = sizeof(cl_int) * 16 * n_elems;
num_elements = n_elems * 16;
input_ptr[0] = (cl_int*)malloc(length);
input_ptr[1] = (cl_int*)malloc(length);
input_ptr[2] = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
streams[0] = clCreateBuffer(context, 0, length, NULL, &err);
test_error(err, "clCreateBuffer failed");
streams[1] = clCreateBuffer(context, 0, length, NULL, &err);
test_error(err, "clCreateBuffer failed");
streams[2] = clCreateBuffer(context, 0, length, NULL, &err);
test_error(err, "clCreateBuffer failed");
streams[3] = clCreateBuffer(context, 0, length, NULL, &err);
test_error(err, "clCreateBuffer failed");
d = init_genrand( gRandomSeed );
p = input_ptr[0];
for (i=0; i<num_elements; i++)
p[i] = random_int24(d);
p = input_ptr[1];
for (i=0; i<num_elements; i++)
p[i] = random_int24(d);
p = input_ptr[2];
for (i=0; i<num_elements; i++)
p[i] = random_int32(d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &int_mad24_kernel_code, "test_int_mad24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &int2_mad24_kernel_code, "test_int2_mad24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &int3_mad24_kernel_code, "test_int3_mad24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &int4_mad24_kernel_code, "test_int4_mad24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[4], &kernel[4], 1, &int8_mad24_kernel_code, "test_int8_mad24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[5], &kernel[5], 1, &int16_mad24_kernel_code, "test_int16_mad24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS], &kernel[NUM_PROGRAMS], 1, &uint_mad24_kernel_code, "test_uint_mad24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+1], &kernel[NUM_PROGRAMS+1], 1, &uint2_mad24_kernel_code, "test_uint2_mad24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+2], &kernel[NUM_PROGRAMS+2], 1, &uint3_mad24_kernel_code, "test_uint3_mad24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+3], &kernel[NUM_PROGRAMS+3], 1, &uint4_mad24_kernel_code, "test_uint4_mad24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+4], &kernel[NUM_PROGRAMS+4], 1, &uint8_mad24_kernel_code, "test_uint8_mad24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+5], &kernel[NUM_PROGRAMS+5], 1, &uint16_mad24_kernel_code, "test_uint16_mad24");
if (err)
return -1;
for (i=0; i< 2*NUM_PROGRAMS; i++)
{
err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3]);
test_error(err, "clSetKernelArg failed");
}
threads[0] = (unsigned int)n_elems;
// test signed
for (i=0; i<NUM_PROGRAMS; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error(err, "clEnqueueNDRangeKernel failed");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueNDRangeKernel failed");
if (verify_int_mad24(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems * vector_sizes[i], vector_sizes[i]))
{
log_error("INT_MAD24 %s test failed\n", test_str_names[i]);
err = -1;
}
else
{
log_info("INT_MAD24 %s test passed\n", test_str_names[i]);
err = 0;
}
if (err)
break;
}
p = input_ptr[0];
for (i=0; i<num_elements; i++)
p[i] &= 0xffffffU;
p = input_ptr[1];
for (i=0; i<num_elements; i++)
p[i] &= 0xffffffU;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
// test unsigned
for (i=NUM_PROGRAMS; i<2*NUM_PROGRAMS; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error(err, "clEnqueueNDRangeKernel failed");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueNDRangeKernel failed");
if (verify_uint_mad24( (cl_uint*) input_ptr[0], (cl_uint*) input_ptr[1], (cl_uint*) input_ptr[2], (cl_uint*)output_ptr, n_elems * vector_sizes[i-NUM_PROGRAMS], vector_sizes[i-NUM_PROGRAMS]))
{
log_error("UINT_MAD24 %s test failed\n", test_str_names[i]);
err = -1;
}
else
{
log_info("UINT_MAD24 %s test passed\n", test_str_names[i]);
err = 0;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<2*NUM_PROGRAMS; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,390 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#define NUM_PROGRAMS 6
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
const char *int_mul24_kernel_code =
"__kernel void test_int_mul24(__global int *srcA, __global int *srcB, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
"}\n";
const char *int2_mul24_kernel_code =
"__kernel void test_int2_mul24(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
"}\n";
const char *int3_mul24_kernel_code =
"__kernel void test_int3_mul24(__global int *srcA, __global int *srcB, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" int3 tmp = mul24(vload3(tid, srcA), vload3(tid, srcB));\n"
" vstore3(tmp, tid, dst);\n"
"}\n";
const char *int4_mul24_kernel_code =
"__kernel void test_int4_mul24(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
"}\n";
const char *int8_mul24_kernel_code =
"__kernel void test_int8_mul24(__global int8 *srcA, __global int8 *srcB, __global int8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
"}\n";
const char *int16_mul24_kernel_code =
"__kernel void test_int16_mul24(__global int16 *srcA, __global int16 *srcB, __global int16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
"}\n";
const char *uint_mul24_kernel_code =
"__kernel void test_int_mul24(__global uint *srcA, __global uint *srcB, __global uint *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
"}\n";
const char *uint2_mul24_kernel_code =
"__kernel void test_int2_mul24(__global uint2 *srcA, __global uint2 *srcB, __global uint2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
"}\n";
const char *uint3_mul24_kernel_code =
"__kernel void test_int3_mul24(__global uint *srcA, __global uint *srcB, __global uint *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" uint3 tmp = mul24(vload3(tid, srcA), vload3(tid, srcB));\n"
" vstore3(tmp, tid, dst);\n"
"}\n";
const char *uint4_mul24_kernel_code =
"__kernel void test_int4_mul24(__global uint4 *srcA, __global uint4 *srcB, __global uint4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
"}\n";
const char *uint8_mul24_kernel_code =
"__kernel void test_int8_mul24(__global uint8 *srcA, __global uint8 *srcB, __global uint8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
"}\n";
const char *uint16_mul24_kernel_code =
"__kernel void test_int16_mul24(__global uint16 *srcA, __global uint16 *srcB, __global uint16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
"}\n";
int
verify_int_mul24(int *inptrA, int *inptrB, int *outptr, size_t n, size_t vecSize)
{
int r;
size_t i;
for (i=0; i<n; i++)
{
int a = (inptrA[i] << 8 ) >> 8;
int b = (inptrB[i] << 8 ) >> 8;
r = a * b;
if (r != outptr[i])
return -1;
}
return 0;
}
int
verify_uint_mul24(cl_uint *inptrA, cl_uint *inptrB, cl_uint *outptr, size_t n, size_t vecSize)
{
cl_uint r;
size_t i;
for (i=0; i<n; i++)
{
r = (inptrA[i] & 0xffffffU) * (inptrB[i] & 0xffffffU);
if (r != outptr[i])
{
log_error( "failed at %ld: 0x%8.8x * 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, inptrA[i], inptrB[i], r, outptr[i] );
return -1;
}
}
return 0;
}
static inline int random_int24( MTdata d )
{
int result = genrand_int32(d);
return (result << 8) >> 8;
}
static const char *test_str_names[] = { "int", "int2", "int3", "int4", "int8", "int16", "uint", "uint2", "uint3", "uint4", "uint8", "uint16" };
int
test_intmul24(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_mem streams[3];
cl_int *input_ptr[2], *output_ptr, *p;
cl_program program[NUM_PROGRAMS*2];
cl_kernel kernel[NUM_PROGRAMS*2];
size_t threads[1];
int num_elements;
int err;
int i;
MTdata d;
size_t length = sizeof(cl_int) * 16 * n_elems;
num_elements = n_elems * 16;
input_ptr[0] = (cl_int*)malloc(length);
input_ptr[1] = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
d = init_genrand( gRandomSeed );
p = input_ptr[0];
for (i=0; i<num_elements; i++)
p[i] = random_int24(d);
p = input_ptr[1];
for (i=0; i<num_elements; i++)
p[i] = random_int24(d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &int_mul24_kernel_code, "test_int_mul24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &int2_mul24_kernel_code, "test_int2_mul24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &int3_mul24_kernel_code, "test_int3_mul24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &int4_mul24_kernel_code, "test_int4_mul24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[4], &kernel[4], 1, &int8_mul24_kernel_code, "test_int8_mul24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[5], &kernel[5], 1, &int16_mul24_kernel_code, "test_int16_mul24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS], &kernel[NUM_PROGRAMS], 1, &uint_mul24_kernel_code, "test_int_mul24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+1], &kernel[NUM_PROGRAMS+1], 1, &uint2_mul24_kernel_code, "test_int2_mul24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+2], &kernel[NUM_PROGRAMS+2], 1, &uint3_mul24_kernel_code, "test_int3_mul24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+3], &kernel[NUM_PROGRAMS+3], 1, &uint4_mul24_kernel_code, "test_int4_mul24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+4], &kernel[NUM_PROGRAMS+4], 1, &uint8_mul24_kernel_code, "test_int8_mul24");
if (err)
return -1;
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+5], &kernel[NUM_PROGRAMS+5], 1, &uint16_mul24_kernel_code, "test_int16_mul24");
if (err)
return -1;
for (i=0; i<2*NUM_PROGRAMS; i++)
{
err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
}
// test signed
threads[0] = (unsigned int)n_elems;
for (i=0; i<NUM_PROGRAMS; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_int_mul24(input_ptr[0], input_ptr[1], output_ptr, vector_sizes[i], vector_sizes[i]);
if (err)
{
log_error("INT_MUL24 %s test failed\n", test_str_names[i]);
err = -1;
}
else
{
log_info("INT_MUL24 %s test passed\n", test_str_names[i]);
err = 0;
}
if (err)
break;
}
// clamp the set of input values to be in range
p = input_ptr[0];
for (i=0; i<num_elements; i++)
p[i] &= 0xffffffU;
p = input_ptr[1];
for (i=0; i<num_elements; i++)
p[i] &= 0xffffffU;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
// test unsigned
for (i=NUM_PROGRAMS; i<2*NUM_PROGRAMS; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_uint_mul24((cl_uint*) input_ptr[0], (cl_uint*) input_ptr[1], (cl_uint*) output_ptr, n_elems * vector_sizes[i-NUM_PROGRAMS], vector_sizes[i-NUM_PROGRAMS]);
if (err)
{
log_error("UINT_MUL24 %s test failed\n", test_str_names[i]);
err = -1;
}
else
{
log_info("UINT_MUL24 %s test passed\n", test_str_names[i]);
err = 0;
}
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
for (i=0; i<2*NUM_PROGRAMS; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,248 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#define str(s) #s
#define __popcnt(x, __T, __n, __r) \
{ \
__T y = x; \
__r = 0; \
int k; \
for(k = 0; k < __n; k++) \
{ \
if(y & (__T)0x1) __r++; \
y >>= (__T)1; \
} \
}
#define __verify_popcount_func(__T) \
static int verify_popcount_##__T( const void *p, const void *r, size_t n, const char *sizeName, size_t vecSize ) \
{ \
const __T *inA = (const __T *) p; \
const __T *outptr = (const __T *) r; \
size_t i; \
int _n = sizeof(__T)*8; \
__T ref; \
for(i = 0; i < n; i++) \
{ \
__T x = inA[i]; \
__T res = outptr[i]; \
__popcnt(x, __T, _n, ref); \
if(res != ref) \
{ \
log_info( "%ld) Failure for popcount( (%s%s) 0x%x ) = *%d vs %d\n", i, str(__T), sizeName, x, (int)ref, (int)res ); \
return -1; \
}\
} \
return 0; \
}
__verify_popcount_func(cl_char);
__verify_popcount_func(cl_uchar);
__verify_popcount_func(cl_short);
__verify_popcount_func(cl_ushort);
__verify_popcount_func(cl_int);
__verify_popcount_func(cl_uint);
__verify_popcount_func(cl_long);
__verify_popcount_func(cl_ulong);
typedef int (*verifyFunc)( const void *, const void *, size_t n, const char *sizeName, size_t vecSize);
static const verifyFunc verify[] = { verify_popcount_cl_char, verify_popcount_cl_uchar,
verify_popcount_cl_short, verify_popcount_cl_ushort,
verify_popcount_cl_int, verify_popcount_cl_uint,
verify_popcount_cl_long, verify_popcount_cl_ulong };
static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
static const char *vector_param_size_names[] = { "", "2", "", "4", "8", "16" };
static const size_t kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
static void printSrc(const char *src[], int nSrcStrings) {
int i;
for(i = 0; i < nSrcStrings; ++i) {
log_info("%s", src[i]);
}
}
int test_popcount(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_int *input_ptr[1], *output_ptr, *p;
int err;
int i;
cl_uint vectorSize;
cl_uint type;
MTdata d;
int fail_count = 0;
size_t length = sizeof(cl_int) * 8 * n_elems;
input_ptr[0] = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
d = init_genrand( gRandomSeed );
p = input_ptr[0];
for (i=0; i<8 * n_elems; i++)
p[i] = genrand_int32(d);
free_mtdata(d); d = NULL;
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
{
//embedded devices don't support long/ulong so skip over
if (! gHasLong && strstr(test_str_names[type],"long"))
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
continue;
}
verifyFunc f = verify[ type ];
// Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
size_t elementCount = length / kSizes[type];
cl_mem streams[2];
log_info( "%s", test_str_names[type] );
fflush( stdout );
// Set up data streams for the type
streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
{
cl_program program = NULL;
cl_kernel kernel = NULL;
const char *source[] = {
"__kernel void test_popcount_", test_str_names[type], vector_size_names[vectorSize],
"(__global ", test_str_names[type], vector_param_size_names[vectorSize],
" *srcA, __global ", test_str_names[type], vector_param_size_names[vectorSize],
" *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" ", test_str_names[type], vector_size_names[vectorSize], " sA;\n",
" sA = ", ( vector_sizes[ vectorSize ] == 3 ) ? "vload3( tid, srcA )" : "srcA[tid]", ";\n",
" ", test_str_names[type], vector_size_names[vectorSize], " dstVal = popcount(sA);\n"
" ", ( vector_sizes[ vectorSize ] == 3 ) ? "vstore3( dstVal, tid, dst )" : "dst[ tid ] = dstVal", ";\n",
"}\n" };
char kernelName[128];
snprintf( kernelName, sizeof( kernelName ), "test_popcount_%s%s", test_str_names[type], vector_size_names[vectorSize] );
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
if (err) {
return -1;
}
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
//Wipe the output buffer clean
uint32_t pattern = 0xdeadbeef;
memset_pattern4( output_ptr, &pattern, length );
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
size_t size = elementCount / (vector_sizes[vectorSize]);
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
char *inP = (char *)input_ptr[0];
char *outP = (char *)output_ptr;
for( size_t e = 0; e < size; e++ )
{
if( f( inP, outP, (vector_sizes[vectorSize]), vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
printSrc(source, sizeof(source)/sizeof(source[0]));
++fail_count; break; // return -1;
}
inP += kSizes[type] * ( (vector_sizes[vectorSize]) );
outP += kSizes[type] * ( (vector_sizes[vectorSize]) );
}
clReleaseKernel( kernel );
clReleaseProgram( program );
log_info( "." );
fflush( stdout );
}
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
log_info( "done\n" );
}
if(fail_count) {
log_info("Failed on %d types\n", fail_count);
return -1;
}
log_info("popcount test passed\n");
free(input_ptr[0]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,376 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#define UCHAR_MIN 0
#define USHRT_MIN 0
#define UINT_MIN 0
#ifndef MAX
#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) )
#endif
#ifndef MIN
#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) )
#endif
static int verify_subsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
{
int i;
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
r = MAX( r, CL_CHAR_MIN );
r = MIN( r, CL_CHAR_MAX );
if( r != outptr[i] )
{ log_info( "\n%d) Failure for sub_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_subsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const cl_uchar *outptr, int n, const char *sizeName, int vecSize )
{
int i;
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
r = MAX( r, 0 );
r = MIN( r, CL_UCHAR_MAX );
if( r != outptr[i] )
{ log_info( "\n%d) Failure for sub_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_subsat_short( const cl_short *inA, const cl_short *inB, const cl_short *outptr, int n, const char *sizeName, int vecSize )
{
int i;
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
r = MAX( r, CL_SHRT_MIN );
r = MIN( r, CL_SHRT_MAX );
if( r != outptr[i] )
{ log_info( "\n%d) Failure for sub_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_subsat_ushort( const cl_ushort *inA, const cl_ushort *inB, const cl_ushort *outptr, int n, const char *sizeName , int vecSize)
{
int i;
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
r = MAX( r, 0 );
r = MIN( r, CL_USHRT_MAX );
if( r != outptr[i] )
{ log_info( "\n%d) Failure for sub_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_subsat_int( const cl_int *inA, const cl_int *inB, const cl_int *outptr, int n, const char *sizeName , int vecSize)
{
int i;
for( i = 0; i < n; i++ )
{
cl_int r = (cl_int) ((cl_uint)inA[i] - (cl_uint)inB[i]);
if( inB[i] < 0 )
{
if( r < inA[i] )
r = CL_INT_MAX;
}
else
{
if( r > inA[i] )
r = CL_INT_MIN;
}
if( r != outptr[i] )
{ log_info( "\n%d) Failure for sub_sat( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_subsat_uint( const cl_uint *inA, const cl_uint *inB, const cl_uint *outptr, int n, const char *sizeName , int vecSize)
{
int i;
for( i = 0; i < n; i++ )
{
cl_uint r = inA[i] - inB[i];
if( inA[i] < inB[i] )
r = 0;
if( r != outptr[i] )
{ log_info( "\n%d) Failure for sub_sat( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_subsat_long( const cl_long *inA, const cl_long *inB, const cl_long *outptr, int n, const char *sizeName , int vecSize)
{
int i;
for( i = 0; i < n; i++ )
{
cl_long r = (cl_long)((cl_ulong)inA[i] - (cl_ulong)inB[i]);
if( inB[i] < 0 )
{
if( r < inA[i] )
r = CL_LONG_MAX;
}
else
{
if( r > inA[i] )
r = CL_LONG_MIN;
}
if( r != outptr[i] )
{ log_info( "%d) Failure for sub_sat( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
static int verify_subsat_ulong( const cl_ulong *inA, const cl_ulong *inB, const cl_ulong *outptr, int n, const char *sizeName , int vecSize)
{
int i;
for( i = 0; i < n; i++ )
{
cl_ulong r = inA[i] - inB[i];
if( inA[i] < inB[i] )
r = 0;
if( r != outptr[i] )
{ log_info( "%d) Failure for sub_sat( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
}
return 0;
}
typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName, int );
static const verifyFunc verify[] = { (verifyFunc) verify_subsat_char, (verifyFunc) verify_subsat_uchar,
(verifyFunc) verify_subsat_short, (verifyFunc) verify_subsat_ushort,
(verifyFunc) verify_subsat_int, (verifyFunc) verify_subsat_uint,
(verifyFunc) verify_subsat_long, (verifyFunc) verify_subsat_ulong };
static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
static const size_t kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
int test_sub_sat(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
int *input_ptr[2], *output_ptr, *p;
int err;
cl_uint i;
cl_uint vectorSize;
cl_uint type;
MTdata d;
int fail_count = 0;
size_t length = sizeof(int) * 4 * n_elems;
input_ptr[0] = (int*)malloc(length);
input_ptr[1] = (int*)malloc(length);
output_ptr = (int*)malloc(length);
d = init_genrand( gRandomSeed );
p = input_ptr[0];
for (i=0; i<4 * (cl_uint) n_elems; i++)
p[i] = genrand_int32(d);
p = input_ptr[1];
for (i=0; i<4 * (cl_uint) n_elems; i++)
p[i] = genrand_int32(d);
free_mtdata(d); d = NULL;
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
{
//embedded devices don't support long/ulong so skip over
if (! gHasLong && strstr(test_str_names[type],"long"))
{
log_info( "WARNING: device does not support 64-bit integers. Skipping %s\n", test_str_names[type] );
continue;
}
verifyFunc f = verify[ type ];
// Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
size_t elementCount = length / kSizes[type];
cl_mem streams[3];
log_info( "%s", test_str_names[type] );
fflush( stdout );
// Set up data streams for the type
streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
{
cl_program program = NULL;
cl_kernel kernel = NULL;
const char *source[] = {
"__kernel void test_sub_sat_", test_str_names[type], vector_size_names[vectorSize],
"(__global ", test_str_names[type], vector_size_names[vectorSize],
" *srcA, __global ", test_str_names[type], vector_size_names[vectorSize],
" *srcB, __global ", test_str_names[type], vector_size_names[vectorSize],
" *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" ", test_str_names[type], vector_size_names[vectorSize], " tmp = sub_sat(srcA[tid], srcB[tid]);\n"
" dst[tid] = tmp;\n"
"}\n"
};
const char *sourceV3[] = {
"__kernel void test_sub_sat_", test_str_names[type], vector_size_names[vectorSize],
"(__global ", test_str_names[type],
" *srcA, __global ", test_str_names[type],
" *srcB, __global ", test_str_names[type],
" *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" ", test_str_names[type], vector_size_names[vectorSize], " tmp = sub_sat(vload3(tid, srcA), vload3(tid, srcB));\n"
" vstore3(tmp, tid, dst);\n"
"}\n"
};
char kernelName[128];
snprintf( kernelName, sizeof( kernelName ), "test_sub_sat_%s%s", test_str_names[type], vector_size_names[vectorSize] );
if(vector_sizes[vectorSize] != 3)
{
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
} else {
err = create_single_kernel_helper(context, &program, &kernel, sizeof( sourceV3 ) / sizeof( sourceV3[0] ), sourceV3, kernelName );
}
if (err)
return -1;
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
//Wipe the output buffer clean
uint32_t pattern = 0xdeadbeef;
memset_pattern4( output_ptr, &pattern, length );
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
size_t size = elementCount / vector_sizes[vectorSize];
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
char *inP = (char *)input_ptr[0];
char *inP2 = (char *)input_ptr[1];
char *outP = (char *)output_ptr;
for( size_t e = 0; e < size; e++ )
{
if( f( inP, inP2, outP, vector_sizes[vectorSize], vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
++fail_count; break; // return -1;
}
inP += kSizes[type] * vector_sizes[vectorSize];
inP2 += kSizes[type] * vector_sizes[vectorSize];
outP += kSizes[type] * vector_sizes[vectorSize];
}
clReleaseKernel( kernel );
clReleaseProgram( program );
log_info( "." );
fflush( stdout );
}
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
clReleaseMemObject( streams[2] );
log_info( "done\n" );
}
if(fail_count) {
log_info("Failed on %d types\n", fail_count);
return -1;
}
log_info("SUB_SAT test passed\n");
free(input_ptr[0]);
free(input_ptr[1]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,211 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/conversions.h"
#define TEST_SIZE 512
enum OpKonstants
{
kIncrement = 0,
kDecrement,
kBoth
};
const char *testKernel =
"__kernel void test( __global %s *inOut, __global char * control )\n"
"{\n"
" size_t tid = get_global_id(0);\n"
"\n"
" %s%s inOutVal = %s;\n"
"\n"
" if( control[tid] == 0 )\n"
" inOutVal++;\n"
" else if( control[tid] == 1 )\n"
" ++inOutVal;\n"
" else if( control[tid] == 2 )\n"
" inOutVal--;\n"
" else // if( control[tid] == 3 )\n"
" --inOutVal;\n"
"\n"
" %s;\n"
"}\n";
typedef int (*OpVerifyFn)( void * actualPtr, void * inputPtr, size_t vecSize, size_t numVecs, cl_char * controls );
int test_unary_op( cl_command_queue queue, cl_context context, OpKonstants whichOp,
ExplicitType vecType, size_t vecSize,
MTdata d, OpVerifyFn verifyFn )
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
cl_long inData[TEST_SIZE * 16], outData[TEST_SIZE * 16];
cl_char controlData[TEST_SIZE];
int error;
size_t i;
size_t threads[1], localThreads[1];
char kernelSource[10240];
char *programPtr;
// Create the source
char loadLine[ 1024 ], storeLine[ 1024 ];
if( vecSize == 1 )
{
sprintf( loadLine, "inOut[tid]" );
sprintf( storeLine, "inOut[tid] = inOutVal" );
}
else
{
sprintf( loadLine, "vload%ld( tid, inOut )", vecSize );
sprintf( storeLine, "vstore%ld( inOutVal, tid, inOut )", vecSize );
}
char sizeNames[][4] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
sprintf( kernelSource, testKernel, get_explicit_type_name( vecType ), /*sizeNames[ vecSize ],*/
get_explicit_type_name( vecType ), sizeNames[ vecSize ],
loadLine, storeLine );
// Create the kernel
programPtr = kernelSource;
if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test" ) )
{
log_error( "ERROR: Unable to create test program!\n" );
return -1;
}
// Generate two streams. The first is our random data to test against, the second is our control stream
generate_random_data( vecType, vecSize * TEST_SIZE, d, inData );
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
get_explicit_type_size( vecType ) * vecSize * TEST_SIZE,
inData, &error );
test_error( error, "Creating input data array failed" );
cl_uint bits;
for( i = 0; i < TEST_SIZE; i++ )
{
size_t which = i & 7;
if( which == 0 )
bits = genrand_int32(d);
controlData[ i ] = ( bits >> ( which << 1 ) ) & 0x03;
if( whichOp == kDecrement )
// For sub ops, the min control value is 2. Otherwise, it's 0
controlData[ i ] |= 0x02;
else if( whichOp == kIncrement )
// For addition ops, the MAX control value is 1. Otherwise, it's 3
controlData[ i ] &= ~0x02;
}
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
sizeof( controlData ), controlData, &error );
test_error( error, "Unable to create control stream" );
// Assign streams and execute
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
test_error( error, "Unable to set indexed kernel arguments" );
// Run the kernel
threads[0] = TEST_SIZE;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to execute test kernel" );
// Read the results
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0,
get_explicit_type_size( vecType ) * TEST_SIZE * vecSize,
outData, 0, NULL, NULL );
test_error( error, "Unable to read output array!" );
// Now verify the results
return verifyFn( outData, inData, vecSize, TEST_SIZE, controlData );
}
template<typename T> int VerifyFn( void * actualPtr, void * inputPtr, size_t vecSize, size_t numVecs, cl_char * controls )
{
T * actualData = (T *)actualPtr;
T * inputData = (T *)inputPtr;
size_t index = 0;
for( size_t i = 0; i < numVecs; i++ )
{
for( size_t j = 0; j < vecSize; j++, index++ )
{
T nextVal = inputData[ index ];
if( controls[ i ] & 0x02 )
nextVal--;
else
nextVal++;
if( actualData[ index ] != nextVal )
{
log_error( "ERROR: Validation failed on vector %ld:%ld (expected %lld, got %lld)", i, j,
(cl_long)nextVal, (cl_long)actualData[ index ] );
return -1;
}
}
}
return 0;
}
int test_unary_op_set( cl_command_queue queue, cl_context context, OpKonstants whichOp )
{
ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
OpVerifyFn verifys[] = { VerifyFn<cl_char>, VerifyFn<cl_uchar>, VerifyFn<cl_short>, VerifyFn<cl_ushort>, VerifyFn<cl_int>, VerifyFn<cl_uint>, VerifyFn<cl_long>, VerifyFn<cl_ulong>, NULL };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
unsigned int index, typeIndex;
int retVal = 0;
RandomSeed seed(gRandomSeed );
for( typeIndex = 0; types[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
{
if ((types[ typeIndex ] == kLong || types[ typeIndex ] == kULong) && !gHasLong)
continue;
for( index = 0; vecSizes[ index ] != 0; index++ )
{
if( test_unary_op( queue, context, whichOp, types[ typeIndex ], vecSizes[ index ], seed, verifys[ typeIndex ] ) != 0 )
{
log_error( " Vector %s%d FAILED\n", get_explicit_type_name( types[ typeIndex ] ), vecSizes[ index ] );
retVal = -1;
}
}
}
return retVal;
}
int test_unary_ops_full(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_unary_op_set( queue, context, kBoth );
}
int test_unary_ops_increment(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_unary_op_set( queue, context, kIncrement );
}
int test_unary_ops_decrement(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_unary_op_set( queue, context, kDecrement );
}

View File

@@ -0,0 +1,263 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/conversions.h"
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
#define NUM_VECTOR_SIZES 6
const char *permute_2_param_kernel_pattern =
"__kernel void test_upsample(__global %s *sourceA, __global %s *sourceB, __global %s *destValues)\n"
"{\n"
" int tid = get_global_id(0);\n"
" destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
"\n"
"}\n";
const char *permute_2_param_kernel_pattern_v3srcdst =
"__kernel void test_upsample(__global %s *sourceA, __global %s *sourceB, __global %s *destValues)\n"
"{\n"
" int tid = get_global_id(0);\n"
" vstore3( %s( vload3(tid,sourceA), vload3(tid, sourceB) ), tid, destValues);\n"
"\n"
"}\n";
int test_upsample_2_param_fn(cl_command_queue queue, cl_context context, const char *fnName, ExplicitType sourceAType, ExplicitType sourceBType, ExplicitType outType,
size_t sourceAVecSize, size_t sourceBVecSize, size_t outVecSize, size_t count,
void *sourceA, void *sourceB, void *expectedResults )
{
cl_program program;
cl_kernel kernel;
int error, retCode = 0;
cl_mem streams[3];
void *outData;
size_t threadSize, groupSize, i;
unsigned char *expectedPtr, *outPtr;
size_t sourceATypeSize, sourceBTypeSize, outTypeSize, outStride;
char programSource[ 10240 ], aType[ 64 ], bType[ 64 ], tType[ 64 ];
const char *progPtr;
sourceATypeSize = get_explicit_type_size( sourceAType );
sourceBTypeSize = get_explicit_type_size( sourceBType );
outTypeSize = get_explicit_type_size( outType );
outStride = outTypeSize * outVecSize;
outData = malloc( outStride * count );
/* Construct the program */
strcpy( aType, get_explicit_type_name( sourceAType ) );
strcpy( bType, get_explicit_type_name( sourceBType ) );
strcpy( tType, get_explicit_type_name( outType ) );
if( sourceAVecSize > 1 && sourceAVecSize != 3)
sprintf( aType + strlen( aType ), "%d", (int)sourceAVecSize );
if( sourceBVecSize > 1 && sourceBVecSize != 3)
sprintf( bType + strlen( bType ), "%d", (int)sourceBVecSize );
if( outVecSize > 1 && outVecSize != 3)
sprintf( tType + strlen( tType ), "%d", (int)outVecSize );
if(sourceAVecSize == 3 && sourceBVecSize == 3 && outVecSize == 3)
{
// permute_2_param_kernel_pattern_v3srcdst
sprintf( programSource, permute_2_param_kernel_pattern_v3srcdst, aType, bType, tType, fnName );
}
else if(sourceAVecSize != 3 && sourceBVecSize != 3 && outVecSize != 3)
{
sprintf( programSource, permute_2_param_kernel_pattern, aType, bType, tType, fnName );
} else {
vlog_error("Not implemented for %d,%d -> %d\n",
(int)sourceAVecSize, (int)sourceBVecSize, (int)outVecSize);
return -1;
}
progPtr = (const char *)programSource;
if( create_single_kernel_helper( context, &program, &kernel, 1, &progPtr, "test_upsample" ) )
{
free( outData );
return -1;
}
/* Set up parameters */
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sourceATypeSize * sourceAVecSize * count, sourceA, NULL );
if (!streams[0])
{
log_error("ERROR: Creating input array A failed!\n");
return -1;
}
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sourceBTypeSize * sourceBVecSize * count, sourceB, NULL );
if (!streams[1])
{
log_error("ERROR: Creating input array B failed!\n");
return -1;
}
streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), outStride * count, NULL, NULL );
if (!streams[2])
{
log_error("ERROR: Creating output array failed!\n");
return -1;
}
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
test_error( error, "Unable to set kernel arguments" );
error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2] );
test_error( error, "Unable to set kernel arguments" );
/* Run the kernel */
threadSize = count;
error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &threadSize, &groupSize, 0, NULL, NULL );
test_error( error, "Unable to execute test kernel" );
/* Now verify the results. Each value should have been duplicated four times, and we should be able to just
do a memcpy instead of relying on the actual type of data */
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, outStride * count, outData, 0, NULL, NULL );
test_error( error, "Unable to read output values!" );
expectedPtr = (unsigned char *)expectedResults;
outPtr = (unsigned char *)outData;
for( i = 0; i < count; i++ )
{
if( memcmp( outPtr, expectedPtr, outTypeSize * outVecSize ) != 0 )
{
log_error( "ERROR: Output value %d does not validate!\n", (int)i );
retCode = -1;
break;
}
expectedPtr += outTypeSize * outVecSize;
outPtr += outStride;
}
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
clReleaseMemObject( streams[2] );
clReleaseKernel( kernel );
clReleaseProgram( program );
free( outData );
return retCode;
}
void * create_upsample_data( ExplicitType type, void *sourceA, void *sourceB, size_t count )
{
void *outData;
size_t i, tSize;
tSize = get_explicit_type_size( type );
outData = malloc( tSize * count * 2 );
switch( tSize )
{
case 1:
{
const cl_uchar *aPtr = (const cl_uchar *) sourceA;
const cl_uchar *bPtr = (const cl_uchar *) sourceB;
cl_ushort *dPtr = (cl_ushort*) outData;
for( i = 0; i < count; i++ )
{
cl_ushort u = *bPtr++;
u |= ((cl_ushort) *aPtr++) << 8;
*dPtr++ = u;
}
}
break;
case 2:
{
const cl_ushort *aPtr = (const cl_ushort *) sourceA;
const cl_ushort *bPtr = (const cl_ushort *) sourceB;
cl_uint *dPtr = (cl_uint*) outData;
for( i = 0; i < count; i++ )
{
cl_uint u = *bPtr++;
u |= ((cl_uint) *aPtr++) << 16;
*dPtr++ = u;
}
}
break;
case 4:
{
const cl_uint *aPtr = (const cl_uint *) sourceA;
const cl_uint *bPtr = (const cl_uint *) sourceB;
cl_ulong *dPtr = (cl_ulong*) outData;
for( i = 0; i < count; i++ )
{
cl_ulong u = *bPtr++;
u |= ((cl_ulong) *aPtr++) << 32;
*dPtr++ = u;
}
}
break;
default:
log_error( "ERROR: unknown type size: %ld\n", tSize );
return NULL;
}
return outData;
}
int test_integer_upsample(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
ExplicitType typesToTest[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kNumExplicitTypes };
ExplicitType baseTypes[] = { kUChar, kUChar, kUShort, kUShort, kUInt, kUInt, kNumExplicitTypes };
ExplicitType outTypes[] = { kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
int i, err = 0;
int sizeIndex;
size_t size;
void *sourceA, *sourceB, *expected;
RandomSeed seed(gRandomSeed );
for( i = 0; typesToTest[ i ] != kNumExplicitTypes; i++ )
{
if ((outTypes[i] == kLong || outTypes[i] == kULong) && !gHasLong)
{
log_info( "Longs unsupported on this device. Skipping...\n");
continue;
}
for( sizeIndex = 0; sizeIndex < NUM_VECTOR_SIZES; sizeIndex++)
{
size = (size_t)vector_sizes[sizeIndex];
log_info("running upsample test for %s %s vector size %d\n", get_explicit_type_name(typesToTest[i]), get_explicit_type_name(baseTypes[i]), (int)size);
sourceA = create_random_data( typesToTest[ i ], seed, 256 );
sourceB = create_random_data( baseTypes[ i ], seed, 256 );
expected = create_upsample_data( typesToTest[ i ], sourceA, sourceB, 256 );
if( test_upsample_2_param_fn( queue, context, "upsample",
typesToTest[ i ], baseTypes[ i ],
outTypes[ i ],
size, size, size,
256 / size,
sourceA, sourceB, expected ) != 0 )
{
log_error( "TEST FAILED: %s for %s%d\n", "upsample", get_explicit_type_name( typesToTest[ i ] ), (int)size );
err = -1;
}
free( sourceA );
free( sourceB );
free( expected );
}
}
return err;
}

File diff suppressed because it is too large Load Diff