mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Initial open source release of OpenCL 2.2 CTS.
This commit is contained in:
26
test_conformance/integer_ops/CMakeLists.txt
Normal file
26
test_conformance/integer_ops/CMakeLists.txt
Normal file
@@ -0,0 +1,26 @@
|
||||
set(MODULE_NAME INTEGER_OPS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.c
|
||||
test_int_basic_ops.c
|
||||
test_integers.cpp
|
||||
test_upsample.cpp
|
||||
test_intmul24.c test_intmad24.c
|
||||
test_sub_sat.c test_add_sat.c
|
||||
test_abs.c test_absdiff.c
|
||||
test_unary_ops.cpp
|
||||
verification_and_generation_functions.c
|
||||
test_popcount.c
|
||||
../../test_common/harness/ThreadPool.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/msvc9.c
|
||||
../../test_common/harness/parseParameters.cpp
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
|
||||
28
test_conformance/integer_ops/Jamfile
Normal file
28
test_conformance/integer_ops/Jamfile
Normal file
@@ -0,0 +1,28 @@
|
||||
project
|
||||
: requirements
|
||||
<toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe test_integer_ops
|
||||
: main.c
|
||||
test_abs.c
|
||||
test_absdiff.c
|
||||
test_add_sat.c
|
||||
test_int.c
|
||||
test_integers.cpp
|
||||
test_intmad24.c
|
||||
test_intmul24.c
|
||||
test_long.c
|
||||
test_sub_sat.c
|
||||
test_uint.c
|
||||
test_ulong.c
|
||||
test_upsample.cpp
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_integer_ops
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/integer_ops
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/integer_ops
|
||||
;
|
||||
|
||||
52
test_conformance/integer_ops/Makefile
Normal file
52
test_conformance/integer_ops/Makefile
Normal file
@@ -0,0 +1,52 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c \
|
||||
test_popcount.c \
|
||||
test_int_basic_ops.c \
|
||||
test_integers.cpp \
|
||||
test_upsample.cpp \
|
||||
test_intmul24.c test_intmad24.c \
|
||||
test_sub_sat.c test_add_sat.c \
|
||||
test_abs.c test_absdiff.c \
|
||||
test_unary_ops.cpp \
|
||||
verification_and_generation_functions.c \
|
||||
../../test_common/harness/conversions.c \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/threadTesting.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/ThreadPool.c \
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
|
||||
DEFINES =
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
FRAMEWORK = $(SOURCES)
|
||||
HEADERS =
|
||||
TARGET = test_integer_ops
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
347
test_conformance/integer_ops/main.c
Normal file
347
test_conformance/integer_ops/main.c
Normal file
@@ -0,0 +1,347 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
basefn basefn_list[] = {
|
||||
test_integer_clz,
|
||||
test_integer_ctz,
|
||||
test_integer_hadd,
|
||||
test_integer_rhadd,
|
||||
test_integer_mul_hi,
|
||||
test_integer_rotate,
|
||||
test_integer_clamp,
|
||||
test_integer_mad_sat,
|
||||
test_integer_mad_hi,
|
||||
test_integer_min,
|
||||
test_integer_max,
|
||||
test_integer_upsample,
|
||||
|
||||
test_abs,
|
||||
test_absdiff,
|
||||
test_add_sat,
|
||||
test_sub_sat,
|
||||
|
||||
test_integer_addAssign,
|
||||
test_integer_subtractAssign,
|
||||
test_integer_multiplyAssign,
|
||||
test_integer_divideAssign,
|
||||
test_integer_moduloAssign,
|
||||
test_integer_andAssign,
|
||||
test_integer_orAssign,
|
||||
test_integer_exclusiveOrAssign,
|
||||
|
||||
test_unary_ops_increment,
|
||||
test_unary_ops_decrement,
|
||||
test_unary_ops_full,
|
||||
|
||||
test_intmul24,
|
||||
test_intmad24,
|
||||
|
||||
test_long_math,
|
||||
test_long_logic,
|
||||
test_long_shift,
|
||||
test_long_compare,
|
||||
|
||||
test_ulong_math,
|
||||
test_ulong_logic,
|
||||
test_ulong_shift,
|
||||
test_ulong_compare,
|
||||
|
||||
test_int_math,
|
||||
test_int_logic,
|
||||
test_int_shift,
|
||||
test_int_compare,
|
||||
|
||||
test_uint_math,
|
||||
test_uint_logic,
|
||||
test_uint_shift,
|
||||
test_uint_compare,
|
||||
|
||||
test_short_math,
|
||||
test_short_logic,
|
||||
test_short_shift,
|
||||
test_short_compare,
|
||||
|
||||
test_ushort_math,
|
||||
test_ushort_logic,
|
||||
test_ushort_shift,
|
||||
test_ushort_compare,
|
||||
|
||||
test_char_math,
|
||||
test_char_logic,
|
||||
test_char_shift,
|
||||
test_char_compare,
|
||||
|
||||
test_uchar_math,
|
||||
test_uchar_logic,
|
||||
test_uchar_shift,
|
||||
test_uchar_compare,
|
||||
|
||||
test_popcount,
|
||||
|
||||
|
||||
// Quick
|
||||
test_quick_long_math,
|
||||
test_quick_long_logic,
|
||||
test_quick_long_shift,
|
||||
test_quick_long_compare,
|
||||
|
||||
test_quick_ulong_math,
|
||||
test_quick_ulong_logic,
|
||||
test_quick_ulong_shift,
|
||||
test_quick_ulong_compare,
|
||||
|
||||
test_quick_int_math,
|
||||
test_quick_int_logic,
|
||||
test_quick_int_shift,
|
||||
test_quick_int_compare,
|
||||
|
||||
test_quick_uint_math,
|
||||
test_quick_uint_logic,
|
||||
test_quick_uint_shift,
|
||||
test_quick_uint_compare,
|
||||
|
||||
test_quick_short_math,
|
||||
test_quick_short_logic,
|
||||
test_quick_short_shift,
|
||||
test_quick_short_compare,
|
||||
|
||||
test_quick_ushort_math,
|
||||
test_quick_ushort_logic,
|
||||
test_quick_ushort_shift,
|
||||
test_quick_ushort_compare,
|
||||
|
||||
test_quick_char_math,
|
||||
test_quick_char_logic,
|
||||
test_quick_char_shift,
|
||||
test_quick_char_compare,
|
||||
|
||||
test_quick_uchar_math,
|
||||
test_quick_uchar_logic,
|
||||
test_quick_uchar_shift,
|
||||
test_quick_uchar_compare,
|
||||
|
||||
test_vector_scalar_ops,
|
||||
};
|
||||
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"integer_clz",
|
||||
"integer_ctz",
|
||||
"integer_hadd",
|
||||
"integer_rhadd",
|
||||
"integer_mul_hi",
|
||||
"integer_rotate",
|
||||
"integer_clamp",
|
||||
"integer_mad_sat",
|
||||
"integer_mad_hi",
|
||||
"integer_min",
|
||||
"integer_max",
|
||||
"integer_upsample",
|
||||
|
||||
"integer_abs",
|
||||
"integer_abs_diff",
|
||||
"integer_add_sat",
|
||||
"integer_sub_sat",
|
||||
|
||||
"integer_addAssign",
|
||||
"integer_subtractAssign",
|
||||
"integer_multiplyAssign",
|
||||
"integer_divideAssign",
|
||||
"integer_moduloAssign",
|
||||
"integer_andAssign",
|
||||
"integer_orAssign",
|
||||
"integer_exclusiveOrAssign",
|
||||
|
||||
"unary_ops_increment",
|
||||
"unary_ops_decrement",
|
||||
"unary_ops_full",
|
||||
|
||||
"integer_mul24",
|
||||
"integer_mad24",
|
||||
|
||||
"long_math",
|
||||
"long_logic",
|
||||
"long_shift",
|
||||
"long_compare",
|
||||
|
||||
"ulong_math",
|
||||
"ulong_logic",
|
||||
"ulong_shift",
|
||||
"ulong_compare",
|
||||
|
||||
"int_math",
|
||||
"int_logic",
|
||||
"int_shift",
|
||||
"int_compare",
|
||||
|
||||
"uint_math",
|
||||
"uint_logic",
|
||||
"uint_shift",
|
||||
"uint_compare",
|
||||
|
||||
"short_math",
|
||||
"short_logic",
|
||||
"short_shift",
|
||||
"short_compare",
|
||||
|
||||
"ushort_math",
|
||||
"ushort_logic",
|
||||
"ushort_shift",
|
||||
"ushort_compare",
|
||||
|
||||
"char_math",
|
||||
"char_logic",
|
||||
"char_shift",
|
||||
"char_compare",
|
||||
|
||||
"uchar_math",
|
||||
"uchar_logic",
|
||||
"uchar_shift",
|
||||
"uchar_compare",
|
||||
|
||||
"popcount",
|
||||
|
||||
// Quick
|
||||
"quick_long_math",
|
||||
"quick_long_logic",
|
||||
"quick_long_shift",
|
||||
"quick_long_compare",
|
||||
|
||||
"quick_ulong_math",
|
||||
"quick_ulong_logic",
|
||||
"quick_ulong_shift",
|
||||
"quick_ulong_compare",
|
||||
|
||||
"quick_int_math",
|
||||
"quick_int_logic",
|
||||
"quick_int_shift",
|
||||
"quick_int_compare",
|
||||
|
||||
"quick_uint_math",
|
||||
"quick_uint_logic",
|
||||
"quick_uint_shift",
|
||||
"quick_uint_compare",
|
||||
|
||||
"quick_short_math",
|
||||
"quick_short_logic",
|
||||
"quick_short_shift",
|
||||
"quick_short_compare",
|
||||
|
||||
"quick_ushort_math",
|
||||
"quick_ushort_logic",
|
||||
"quick_ushort_shift",
|
||||
"quick_ushort_compare",
|
||||
|
||||
"quick_char_math",
|
||||
"quick_char_logic",
|
||||
"quick_char_shift",
|
||||
"quick_char_compare",
|
||||
|
||||
"quick_uchar_math",
|
||||
"quick_uchar_logic",
|
||||
"quick_uchar_shift",
|
||||
"quick_uchar_compare",
|
||||
|
||||
"vector_scalar",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d )
|
||||
{
|
||||
static const cl_long sUniqueValues[] = { 0x3333333333333333LL, 0x5555555555555555LL, 0x9999999999999999LL, 0xaaaaaaaaaaaaaaaaLL, 0xccccccccccccccccLL,
|
||||
0x3030303030303030LL, 0x5050505050505050LL, 0x9090909090909090LL, 0xa0a0a0a0a0a0a0a0LL, 0xc0c0c0c0c0c0c0c0LL, 0xf0f0f0f0f0f0f0f0LL,
|
||||
0x0303030303030303LL, 0x0505050505050505LL, 0x0909090909090909LL, 0x0a0a0a0a0a0a0a0aLL, 0x0c0c0c0c0c0c0c0cLL, 0x0f0f0f0f0f0f0f0fLL,
|
||||
0x3300330033003300LL, 0x5500550055005500LL, 0x9900990099009900LL, 0xaa00aa00aa00aa00LL, 0xcc00cc00cc00cc00LL, 0xff00ff00ff00ff00LL,
|
||||
0x0033003300330033LL, 0x0055005500550055LL, 0x0099009900990099LL, 0x00aa00aa00aa00aaLL, 0x00cc00cc00cc00ccLL, 0x00ff00ff00ff00ffLL,
|
||||
0x3333333300000000LL, 0x5555555500000000LL, 0x9999999900000000LL, 0xaaaaaaaa00000000LL, 0xcccccccc00000000LL, 0xffffffff00000000LL,
|
||||
0x0000000033333333LL, 0x0000000055555555LL, 0x0000000099999999LL, 0x00000000aaaaaaaaLL, 0x00000000ccccccccLL, 0x00000000ffffffffLL,
|
||||
0x3333000000003333LL, 0x5555000000005555LL, 0x9999000000009999LL, 0xaaaa00000000aaaaLL, 0xcccc00000000ccccLL, 0xffff00000000ffffLL};
|
||||
static cl_long sSpecialValues[ 128 + 128 + 128 + ( sizeof( sUniqueValues ) / sizeof( sUniqueValues[ 0 ] ) ) ] = { 0 };
|
||||
|
||||
if( sSpecialValues[ 0 ] == 0 )
|
||||
{
|
||||
// Init the power-of-two special values
|
||||
for( size_t i = 0; i < 64; i++ )
|
||||
{
|
||||
sSpecialValues[ i ] = 1LL << i;
|
||||
sSpecialValues[ i + 64 ] = -1LL << i;
|
||||
sSpecialValues[ i + 128 ] = sSpecialValues[ i ] - 1;
|
||||
sSpecialValues[ i + 128 + 64 ] = sSpecialValues[ i ] - 1;
|
||||
sSpecialValues[ i + 256 ] = sSpecialValues[ i ] + 1;
|
||||
sSpecialValues[ i + 256 + 64 ] = sSpecialValues[ i ] + 1;
|
||||
}
|
||||
memcpy( &sSpecialValues[ 128 + 128 + 128 ], sUniqueValues, sizeof( sUniqueValues ) );
|
||||
}
|
||||
|
||||
size_t i, aIdx = 0, bIdx = 0;
|
||||
size_t numSpecials = sizeof( sSpecialValues ) / sizeof( sSpecialValues[ 0 ] );
|
||||
|
||||
for( i = 0; i < numElements; i++ )
|
||||
{
|
||||
outBufferA[ i ] = sSpecialValues[ aIdx ];
|
||||
outBufferB[ i ] = sSpecialValues[ bIdx ];
|
||||
bIdx++;
|
||||
if( bIdx == numSpecials )
|
||||
{
|
||||
bIdx = 0;
|
||||
aIdx++;
|
||||
if( aIdx == numSpecials )
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( i < numElements )
|
||||
{
|
||||
// Fill remainder with random values
|
||||
for( ; i < numElements; i++ )
|
||||
{
|
||||
int a = (int)genrand_int32(d);
|
||||
int b = (int)genrand_int32(d);
|
||||
outBufferA[ i ] = ((cl_long)a <<33 | (cl_long)b) ^ ((cl_long)b << 16);
|
||||
|
||||
a = (int)genrand_int32(d);
|
||||
b = (int)genrand_int32(d);
|
||||
outBufferB[ i ] = ((cl_long)a <<33 | (cl_long)b) ^ ((cl_long)b << 16);
|
||||
}
|
||||
}
|
||||
else if( aIdx < numSpecials )
|
||||
{
|
||||
log_info( "WARNING: Not enough space to fill all special values for long test! (need %d additional elements)\n", (int)( ( numSpecials - aIdx ) * numSpecials ) );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false /* image support required */, false /* force no context creation */, 0 );
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
143
test_conformance/integer_ops/procs.h
Normal file
143
test_conformance/integer_ops/procs.h
Normal file
@@ -0,0 +1,143 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/threadTesting.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
|
||||
// The number of errors to print out for each test
|
||||
#define MAX_ERRORS_TO_PRINT 10
|
||||
|
||||
extern const size_t vector_aligns[];
|
||||
|
||||
extern int create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
|
||||
extern void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d );
|
||||
|
||||
|
||||
extern int test_popcount(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_clz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_ctz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_hadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_rhadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_mul_hi(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_rotate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_clamp(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_mad_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_mad_hi(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_upsample(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_integer_addAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_subtractAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_multiplyAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_divideAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_moduloAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_andAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_orAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_integer_exclusiveOrAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_abs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_absdiff(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_add_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_sub_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_intmul24(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmad24(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
|
||||
extern int test_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
|
||||
extern int test_quick_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_quick_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_quick_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_quick_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_quick_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_unary_ops_full(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_unary_ops_increment(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_unary_ops_decrement(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_vector_scalar_ops(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
31
test_conformance/integer_ops/testBase.h
Normal file
31
test_conformance/integer_ops/testBase.h
Normal file
@@ -0,0 +1,31 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _testBase_h
|
||||
#define _testBase_h
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#endif // _testBase_h
|
||||
|
||||
|
||||
|
||||
335
test_conformance/integer_ops/test_abs.c
Normal file
335
test_conformance/integer_ops/test_abs.c
Normal file
@@ -0,0 +1,335 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
|
||||
static int verify_abs_char( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_char *inA = (const cl_char*) p;
|
||||
const cl_uchar *outptr = (const cl_uchar*) q;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_uchar r = inA[i];
|
||||
if( inA[i] < 0 )
|
||||
r = -inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for abs( (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_abs_short( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_short *inA = (const cl_short*) p;
|
||||
const cl_ushort *outptr = (const cl_ushort*) q;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_ushort r = inA[i];
|
||||
if( inA[i] < 0 )
|
||||
r = -inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for abs( (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_abs_int( const void *p, const void *q, size_t n, const char *sizeName , size_t vecSize)
|
||||
{
|
||||
const cl_int *inA = (const cl_int*) p;
|
||||
const cl_uint *outptr = (const cl_uint*) q;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_uint r = inA[i];
|
||||
if( inA[i] < 0 )
|
||||
r = -inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for abs( (int%s) 0x%2.2x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_abs_long( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_long *inA = (const cl_long*) p;
|
||||
const cl_ulong *outptr = (const cl_ulong*) q;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_ulong r = inA[i];
|
||||
if( inA[i] < 0 )
|
||||
r = -inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for abs( (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int verify_abs_uchar( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_uchar *inA = (const cl_uchar*) p;
|
||||
const cl_uchar *outptr = (const cl_uchar*) q;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_uchar r = inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for abs( (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_abs_ushort( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_ushort *inA = (const cl_ushort*) p;
|
||||
const cl_ushort *outptr = (const cl_ushort*) q;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_ushort r = inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for abs( (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_abs_uint( const void *p, const void *q, size_t n, const char *sizeName , size_t vecSize)
|
||||
{
|
||||
const cl_uint *inA = (const cl_uint*) p;
|
||||
const cl_uint *outptr = (const cl_uint*) q;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_uint r = inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for abs( (int%s) 0x%2.2x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_abs_ulong( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_ulong *inA = (const cl_ulong*) p;
|
||||
const cl_ulong *outptr = (const cl_ulong*) q;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_ulong r = inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for abs( (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
typedef int (*verifyFunc)( const void *, const void *, size_t n, const char *sizeName, size_t vecSize );
|
||||
static const verifyFunc verify[] = {
|
||||
verify_abs_char, verify_abs_short, verify_abs_int, verify_abs_long,
|
||||
verify_abs_uchar, verify_abs_ushort, verify_abs_uint, verify_abs_ulong
|
||||
};
|
||||
|
||||
static const char *test_str_names[] = { "char", "short", "int", "long" ,
|
||||
"uchar", "ushort", "uint", "ulong"};
|
||||
static const char *test_ustr_names[] = { "uchar", "ushort", "uint", "ulong" ,
|
||||
"uchar", "ushort", "uint", "ulong"};
|
||||
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
|
||||
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
|
||||
static const char *vector_size_names_io_types[] = { "", "2", "", "4", "8", "16" };
|
||||
static const size_t kSizes[9] = { 1, 2, 4, 8, 1, 2, 4, 8 };
|
||||
|
||||
static const char * source_loads[] = {
|
||||
"srcA[tid]",
|
||||
"vload3(tid, srcA)"
|
||||
};
|
||||
|
||||
static const char * dest_stores[] = {
|
||||
" dst[tid] = tmp;\n",
|
||||
" vstore3(tmp, tid, dst);\n"
|
||||
};
|
||||
|
||||
int test_abs(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_int *input_ptr, *output_ptr, *p;
|
||||
int err;
|
||||
int i;
|
||||
cl_uint vectorSizeIdx;
|
||||
cl_uint type;
|
||||
MTdata d;
|
||||
int fail_count = 0;
|
||||
|
||||
size_t length = sizeof(cl_int) * 4 * n_elems;
|
||||
|
||||
input_ptr = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
|
||||
p = input_ptr;
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<n_elems * 4; i++)
|
||||
p[i] = genrand_int32(d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
|
||||
{
|
||||
//embedded devices don't support long/ulong so skip over
|
||||
if (! gHasLong && strstr(test_str_names[type],"long"))
|
||||
{
|
||||
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
|
||||
continue;
|
||||
}
|
||||
|
||||
verifyFunc f = verify[ type ];
|
||||
|
||||
size_t elementCount = length / kSizes[type];
|
||||
cl_mem streams[2];
|
||||
|
||||
log_info( "%s", test_str_names[type] );
|
||||
fflush( stdout );
|
||||
|
||||
// Set up data streams for the type
|
||||
streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
for( vectorSizeIdx = 0; vectorSizeIdx < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSizeIdx++ )
|
||||
{
|
||||
cl_program program = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
|
||||
const char *source[] = {
|
||||
"__kernel void test_abs_",
|
||||
test_str_names[type],
|
||||
vector_size_names[vectorSizeIdx],
|
||||
"(__global ", test_str_names[type],
|
||||
vector_size_names_io_types[vectorSizeIdx],
|
||||
" *srcA, __global ", test_ustr_names[type],
|
||||
vector_size_names_io_types[vectorSizeIdx],
|
||||
" *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" ", test_ustr_names[type], vector_size_names[vectorSizeIdx],
|
||||
" tmp = abs(", source_loads[!!(vector_sizes[vectorSizeIdx]==3)], ");\n",
|
||||
dest_stores[!!(vector_sizes[vectorSizeIdx]==3)],
|
||||
"}\n"
|
||||
};
|
||||
|
||||
char kernelName[128];
|
||||
snprintf( kernelName, sizeof( kernelName ), "test_abs_%s%s", test_str_names[type], vector_size_names[vectorSizeIdx] );
|
||||
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
//Wipe the output buffer clean
|
||||
uint32_t pattern = 0xdeadbeef;
|
||||
memset_pattern4( output_ptr, &pattern, length );
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t size = elementCount / ((vector_sizes[vectorSizeIdx]));
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *inP = (char *)input_ptr;
|
||||
char *outP = (char *)output_ptr;
|
||||
|
||||
for( size_t e = 0; e < size; e++ )
|
||||
{
|
||||
if( f( inP, outP, (vector_sizes[vectorSizeIdx]), vector_size_names[vectorSizeIdx], vector_sizes[vectorSizeIdx] ) ) {
|
||||
++fail_count; break; // return -1;
|
||||
}
|
||||
inP += kSizes[type] * (vector_sizes[vectorSizeIdx] );
|
||||
outP += kSizes[type] * (vector_sizes[vectorSizeIdx]);
|
||||
}
|
||||
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
log_info( "." );
|
||||
fflush( stdout );
|
||||
}
|
||||
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
log_info( "done\n" );
|
||||
}
|
||||
|
||||
if(fail_count) {
|
||||
log_info("Failed on %d types\n", fail_count);
|
||||
return -1;
|
||||
}
|
||||
log_info("ABS test passed\n");
|
||||
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
374
test_conformance/integer_ops/test_absdiff.c
Normal file
374
test_conformance/integer_ops/test_absdiff.c
Normal file
@@ -0,0 +1,374 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
static int verify_absdiff_char( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_char *inA = (const cl_char *)p;
|
||||
const cl_char *inB = (const cl_char *)q;
|
||||
const cl_uchar *outptr = (const cl_uchar *)r;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_uchar r = inA[i] - inB[i];
|
||||
if( inB[i] > inA[i] )
|
||||
r = inB[i] - inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for absdiff( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_absdiff_uchar( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_uchar *inA = (const cl_uchar *)p;
|
||||
const cl_uchar *inB = (const cl_uchar *)q;
|
||||
const cl_uchar *outptr = (const cl_uchar *)r;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_uchar r = inA[i] - inB[i];
|
||||
if( inB[i] > inA[i] )
|
||||
r = inB[i] - inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for absdiff( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_absdiff_short( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_short *inA = (const cl_short *)p;
|
||||
const cl_short *inB = (const cl_short *)q;
|
||||
const cl_ushort *outptr = (const cl_ushort *)r;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_ushort r = inA[i] - inB[i];
|
||||
if( inB[i] > inA[i] )
|
||||
r = inB[i] - inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for absdiff( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_absdiff_ushort( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_ushort *inA = (const cl_ushort *)p;
|
||||
const cl_ushort *inB = (const cl_ushort *)q;
|
||||
const cl_ushort *outptr = (const cl_ushort *)r;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_ushort r = inA[i] - inB[i];
|
||||
if( inB[i] > inA[i] )
|
||||
r = inB[i] - inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for absdiff( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_absdiff_int( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_int *inA = (const cl_int *)p;
|
||||
const cl_int *inB = (const cl_int *)q;
|
||||
const cl_uint *outptr = (const cl_uint *)r;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_uint r = inA[i] - inB[i];
|
||||
if( inB[i] > inA[i] )
|
||||
r = inB[i] - inA[i];
|
||||
if( r != outptr[i] )
|
||||
{
|
||||
log_info( "%ld) Failure for absdiff( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_absdiff_uint( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_uint *inA = (const cl_uint *)p;
|
||||
const cl_uint *inB = (const cl_uint *)q;
|
||||
const cl_uint *outptr = (const cl_uint *)r;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_uint r = inA[i] - inB[i];
|
||||
if( inB[i] > inA[i] )
|
||||
r = inB[i] - inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for absdiff( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_absdiff_long( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_long *inA = (const cl_long *)p;
|
||||
const cl_long *inB = (const cl_long *)q;
|
||||
const cl_ulong *outptr = (const cl_ulong *)r;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_ulong r = inA[i] - inB[i];
|
||||
if( inB[i] > inA[i] )
|
||||
r = inB[i] - inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for absdiff( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_absdiff_ulong( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
|
||||
{
|
||||
const cl_ulong *inA = (const cl_ulong *)p;
|
||||
const cl_ulong *inB = (const cl_ulong *)q;
|
||||
const cl_ulong *outptr = (const cl_ulong *)r;
|
||||
size_t i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_ulong r = inA[i] - inB[i];
|
||||
if( inB[i] > inA[i] )
|
||||
r = inB[i] - inA[i];
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%ld) Failure for absdiff( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef int (*verifyFunc)( const void *, const void *, const void *, size_t n, const char *sizeName, size_t vecSize);
|
||||
static const verifyFunc verify[] = { verify_absdiff_char, verify_absdiff_uchar,
|
||||
verify_absdiff_short, verify_absdiff_ushort,
|
||||
verify_absdiff_int, verify_absdiff_uint,
|
||||
verify_absdiff_long, verify_absdiff_ulong };
|
||||
|
||||
//FIXME: enable long and ulong when GPU path is working
|
||||
static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
|
||||
|
||||
//FIXME: enable "16" when support for > 64 byte vectors go into LLVM
|
||||
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
|
||||
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
|
||||
static const char *vector_param_size_names[] = { "", "2", "", "4", "8", "16" };
|
||||
static const size_t kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
|
||||
|
||||
static void printSrc(const char *src[], int nSrcStrings) {
|
||||
int i;
|
||||
for(i = 0; i < nSrcStrings; ++i) {
|
||||
log_info("%s", src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int test_absdiff(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_int *input_ptr[2], *output_ptr, *p;
|
||||
int err;
|
||||
int i;
|
||||
cl_uint vectorSize;
|
||||
cl_uint type;
|
||||
MTdata d;
|
||||
int fail_count = 0;
|
||||
|
||||
size_t length = sizeof(cl_int) * 4 * n_elems;
|
||||
|
||||
input_ptr[0] = (cl_int*)malloc(length);
|
||||
input_ptr[1] = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<4 * n_elems; i++)
|
||||
p[i] = genrand_int32(d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<4 * n_elems; i++)
|
||||
p[i] = genrand_int32(d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
|
||||
{
|
||||
//embedded devices don't support long/ulong so skip over
|
||||
if (! gHasLong && strstr(test_str_names[type],"long"))
|
||||
{
|
||||
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
|
||||
continue;
|
||||
}
|
||||
|
||||
verifyFunc f = verify[ type ];
|
||||
// Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
|
||||
size_t elementCount = length / kSizes[type];
|
||||
cl_mem streams[3];
|
||||
|
||||
log_info( "%s", test_str_names[type] );
|
||||
fflush( stdout );
|
||||
|
||||
// Set up data streams for the type
|
||||
streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
|
||||
{
|
||||
cl_program program = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
|
||||
const char *source[] = {
|
||||
"__kernel void test_absdiff_", test_str_names[type], vector_size_names[vectorSize],
|
||||
"(__global ", test_str_names[type], vector_param_size_names[vectorSize],
|
||||
" *srcA, __global ", test_str_names[type], vector_param_size_names[vectorSize],
|
||||
" *srcB, __global u", test_str_names[type & -2], vector_param_size_names[vectorSize],
|
||||
" *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" ", test_str_names[type], vector_size_names[vectorSize], " sA, sB;\n",
|
||||
" sA = ", ( vector_sizes[ vectorSize ] == 3 ) ? "vload3( tid, srcA )" : "srcA[tid]", ";\n",
|
||||
" sB = ", ( vector_sizes[ vectorSize ] == 3 ) ? "vload3( tid, srcB )" : "srcB[tid]", ";\n",
|
||||
" u", test_str_names[type & -2], vector_size_names[vectorSize], " dstVal = abs_diff(sA, sB);\n"
|
||||
" ", ( vector_sizes[ vectorSize ] == 3 ) ? "vstore3( dstVal, tid, dst )" : "dst[ tid ] = dstVal", ";\n",
|
||||
"}\n" };
|
||||
|
||||
|
||||
char kernelName[128];
|
||||
snprintf( kernelName, sizeof( kernelName ), "test_absdiff_%s%s", test_str_names[type], vector_size_names[vectorSize] );
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
|
||||
|
||||
if (err) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
log_info("About to run\n");
|
||||
log_info("=====\n");
|
||||
printSrc(source, sizeof(source)/sizeof(source[0]));
|
||||
log_info("=====\n");
|
||||
#endif
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
//Wipe the output buffer clean
|
||||
uint32_t pattern = 0xdeadbeef;
|
||||
memset_pattern4( output_ptr, &pattern, length );
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t size = elementCount / (vector_sizes[vectorSize]);
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *inP = (char *)input_ptr[0];
|
||||
char *inP2 = (char *)input_ptr[1];
|
||||
char *outP = (char *)output_ptr;
|
||||
|
||||
for( size_t e = 0; e < size; e++ )
|
||||
{
|
||||
if( f( inP, inP2, outP, (vector_sizes[vectorSize]), vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
|
||||
printSrc(source, sizeof(source)/sizeof(source[0]));
|
||||
++fail_count; break; // return -1;
|
||||
}
|
||||
inP += kSizes[type] * ( (vector_sizes[vectorSize]) );
|
||||
inP2 += kSizes[type] * ( (vector_sizes[vectorSize]) );
|
||||
outP += kSizes[type] * ( (vector_sizes[vectorSize]) );
|
||||
}
|
||||
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
log_info( "." );
|
||||
fflush( stdout );
|
||||
}
|
||||
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
clReleaseMemObject( streams[2] );
|
||||
log_info( "done\n" );
|
||||
}
|
||||
|
||||
|
||||
if(fail_count) {
|
||||
log_info("Failed on %d types\n", fail_count);
|
||||
return -1;
|
||||
}
|
||||
log_info("ABS_DIFF test passed\n");
|
||||
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
378
test_conformance/integer_ops/test_add_sat.c
Normal file
378
test_conformance/integer_ops/test_add_sat.c
Normal file
@@ -0,0 +1,378 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#define UCHAR_MIN 0
|
||||
#define USHRT_MIN 0
|
||||
#define UINT_MIN 0
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) )
|
||||
#endif
|
||||
#ifndef MIN
|
||||
#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) )
|
||||
#endif
|
||||
|
||||
static int verify_addsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
|
||||
r = MAX( r, CL_CHAR_MIN );
|
||||
r = MIN( r, CL_CHAR_MAX );
|
||||
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "\n%d) Failure for add_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_addsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const cl_uchar *outptr, int n, const char *sizeName, int vecSize )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_int r = (int) inA[i] + (int) inB[i];
|
||||
r = MAX( r, 0 );
|
||||
r = MIN( r, CL_UCHAR_MAX );
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "\n%d) Failure for add_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_addsat_short( const cl_short *inA, const cl_short *inB, const cl_short *outptr, int n, const char *sizeName , int vecSize)
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
|
||||
r = MAX( r, CL_SHRT_MIN );
|
||||
r = MIN( r, CL_SHRT_MAX );
|
||||
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "\n%d) Failure for add_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_addsat_ushort( const cl_ushort *inA, const cl_ushort *inB, const cl_ushort *outptr, int n, const char *sizeName , int vecSize)
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
|
||||
r = MAX( r, 0 );
|
||||
r = MIN( r, CL_USHRT_MAX );
|
||||
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "\n%d) Failure for add_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_addsat_int( const cl_int *inA, const cl_int *inB, const cl_int *outptr, int n, const char *sizeName , int vecSize)
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_int r = (cl_int) ((cl_uint) inA[i] + (cl_uint)inB[i]);
|
||||
if( inB[i] > 0 )
|
||||
{
|
||||
if( r < inA[i] )
|
||||
r = CL_INT_MAX;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( r > inA[i] )
|
||||
r = CL_INT_MIN;
|
||||
}
|
||||
|
||||
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "\n%d) Failure for add_sat( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_addsat_uint( const cl_uint *inA, const cl_uint *inB, const cl_uint *outptr, int n, const char *sizeName , int vecSize)
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_uint r = inA[i] + inB[i];
|
||||
if( r < inA[i] )
|
||||
r = CL_UINT_MAX;
|
||||
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "\n%d) Failure for add_sat( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_addsat_long( const cl_long *inA, const cl_long *inB, const cl_long *outptr, int n, const char *sizeName , int vecSize)
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_long r = (cl_long)((cl_ulong)inA[i] + (cl_ulong)inB[i]);
|
||||
if( inB[i] > 0 )
|
||||
{
|
||||
if( r < inA[i] )
|
||||
r = CL_LONG_MAX;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( r > inA[i] )
|
||||
r = CL_LONG_MIN;
|
||||
}
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%d) Failure for add_sat( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_addsat_ulong( const cl_ulong *inA, const cl_ulong *inB, const cl_ulong *outptr, int n, const char *sizeName , int vecSize)
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_ulong r = inA[i] + inB[i];
|
||||
if( r < inA[i] )
|
||||
r = CL_ULONG_MAX;
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%d) Failure for add_sat( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName, int );
|
||||
static const verifyFunc verify[] = { (verifyFunc) verify_addsat_char, (verifyFunc) verify_addsat_uchar,
|
||||
(verifyFunc) verify_addsat_short, (verifyFunc) verify_addsat_ushort,
|
||||
(verifyFunc) verify_addsat_int, (verifyFunc) verify_addsat_uint,
|
||||
(verifyFunc) verify_addsat_long, (verifyFunc) verify_addsat_ulong };
|
||||
//FIXME: enable long and ulong when GPU path is working
|
||||
static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
|
||||
|
||||
//FIXME: enable "16" when support for > 64 byte vectors go into LLVM
|
||||
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
|
||||
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
|
||||
static const size_t kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
|
||||
|
||||
int test_add_sat(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_int *input_ptr[2], *output_ptr, *p;
|
||||
int err;
|
||||
int i;
|
||||
cl_uint vectorSize;
|
||||
cl_uint type;
|
||||
MTdata d;
|
||||
int fail_count = 0;
|
||||
|
||||
size_t length = sizeof(cl_int) * 4 * n_elems;
|
||||
|
||||
input_ptr[0] = (cl_int*)malloc(length);
|
||||
input_ptr[1] = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<4 * n_elems; i++)
|
||||
p[i] = genrand_int32(d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<4 * n_elems; i++)
|
||||
p[i] = genrand_int32(d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
|
||||
{
|
||||
|
||||
//embedded devices don't support long/ulong so skip over
|
||||
if (! gHasLong && strstr(test_str_names[type],"long"))
|
||||
{
|
||||
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
|
||||
continue;
|
||||
}
|
||||
|
||||
verifyFunc f = verify[ type ];
|
||||
// Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
|
||||
size_t elementCount = length / kSizes[type];
|
||||
cl_mem streams[3];
|
||||
|
||||
log_info( "%s", test_str_names[type] );
|
||||
fflush( stdout );
|
||||
|
||||
// Set up data streams for the type
|
||||
streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
|
||||
{
|
||||
cl_program program = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
|
||||
const char *source[] = {
|
||||
"__kernel void test_add_sat_", test_str_names[type], vector_size_names[vectorSize],
|
||||
"(__global ", test_str_names[type], vector_size_names[vectorSize],
|
||||
" *srcA, __global ", test_str_names[type], vector_size_names[vectorSize],
|
||||
" *srcB, __global ", test_str_names[type], vector_size_names[vectorSize],
|
||||
" *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" ", test_str_names[type], vector_size_names[vectorSize], " tmp = add_sat(srcA[tid], srcB[tid]);\n"
|
||||
" dst[tid] = tmp;\n"
|
||||
"}\n" };
|
||||
|
||||
|
||||
const char *sourceV3[] = {
|
||||
"__kernel void test_add_sat_", test_str_names[type], vector_size_names[vectorSize],
|
||||
"(__global ", test_str_names[type],
|
||||
" *srcA, __global ", test_str_names[type],
|
||||
" *srcB, __global ", test_str_names[type],
|
||||
" *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" ", test_str_names[type], vector_size_names[vectorSize], " tmp = add_sat(vload3(tid, srcA), vload3(tid, srcB));\n"
|
||||
" vstore3(tmp, tid, dst);\n"
|
||||
"}\n" };
|
||||
|
||||
char kernelName[128];
|
||||
snprintf( kernelName, sizeof( kernelName ), "test_add_sat_%s%s", test_str_names[type], vector_size_names[vectorSize] );
|
||||
if(vector_sizes[vectorSize] != 3)
|
||||
{
|
||||
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
|
||||
}
|
||||
else
|
||||
{
|
||||
err = create_single_kernel_helper(context, &program, &kernel, sizeof( sourceV3 ) / sizeof( sourceV3[0] ), sourceV3, kernelName );
|
||||
}
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
//Wipe the output buffer clean
|
||||
uint32_t pattern = 0xdeadbeef;
|
||||
memset_pattern4( output_ptr, &pattern, length );
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t size = elementCount / (vector_sizes[vectorSize]);
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clExecuteKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *inP = (char *)input_ptr[0];
|
||||
char *inP2 = (char *)input_ptr[1];
|
||||
char *outP = (char *)output_ptr;
|
||||
|
||||
for( size_t e = 0; e < size; e++ )
|
||||
{
|
||||
if( f( inP, inP2, outP, (vector_sizes[vectorSize]), vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
|
||||
++fail_count; break; // return -1;
|
||||
}
|
||||
inP += kSizes[type] * vector_sizes[vectorSize];
|
||||
inP2 += kSizes[type] * vector_sizes[vectorSize];
|
||||
outP += kSizes[type] * vector_sizes[vectorSize];
|
||||
}
|
||||
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
log_info( "." );
|
||||
fflush( stdout );
|
||||
}
|
||||
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
clReleaseMemObject( streams[2] );
|
||||
log_info( "done\n" );
|
||||
}
|
||||
if(fail_count) {
|
||||
log_info("Failed on %d types\n", fail_count);
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info("ADD_SAT test passed\n");
|
||||
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
1551
test_conformance/integer_ops/test_int_basic_ops.c
Normal file
1551
test_conformance/integer_ops/test_int_basic_ops.c
Normal file
File diff suppressed because it is too large
Load Diff
1889
test_conformance/integer_ops/test_integers.cpp
Normal file
1889
test_conformance/integer_ops/test_integers.cpp
Normal file
File diff suppressed because it is too large
Load Diff
365
test_conformance/integer_ops/test_intmad24.c
Normal file
365
test_conformance/integer_ops/test_intmad24.c
Normal file
@@ -0,0 +1,365 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#define NUM_PROGRAMS 6
|
||||
|
||||
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
|
||||
|
||||
|
||||
const char *int_mad24_kernel_code =
|
||||
"__kernel void test_int_mad24(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *int2_mad24_kernel_code =
|
||||
"__kernel void test_int2_mad24(__global int2 *srcA, __global int2 *srcB, __global int2 *srcC, __global int2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *int3_mad24_kernel_code =
|
||||
"__kernel void test_int3_mad24(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" int3 tmp = mad24(vload3(tid, srcA), vload3(tid, srcB), vload3(tid, srcC));\n"
|
||||
" vstore3(tmp, tid, dst);\n"
|
||||
"}\n";
|
||||
|
||||
const char *int4_mad24_kernel_code =
|
||||
"__kernel void test_int4_mad24(__global int4 *srcA, __global int4 *srcB, __global int4 *srcC, __global int4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *int8_mad24_kernel_code =
|
||||
"__kernel void test_int8_mad24(__global int8 *srcA, __global int8 *srcB, __global int8 *srcC, __global int8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *int16_mad24_kernel_code =
|
||||
"__kernel void test_int16_mad24(__global int16 *srcA, __global int16 *srcB, __global int16 *srcC, __global int16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
const char *uint_mad24_kernel_code =
|
||||
"__kernel void test_uint_mad24(__global uint *srcA, __global uint *srcB, __global uint *srcC, __global uint *dst)\n"
|
||||
"{\n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *uint2_mad24_kernel_code =
|
||||
"__kernel void test_uint2_mad24(__global uint2 *srcA, __global uint2 *srcB, __global uint2 *srcC, __global uint2 *dst)\n"
|
||||
"{\n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *uint3_mad24_kernel_code =
|
||||
"__kernel void test_uint3_mad24(__global uint *srcA, __global uint *srcB, __global uint *srcC, __global uint *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" uint3 tmp = mad24(vload3(tid, srcA), vload3(tid, srcB), vload3(tid, srcC));\n"
|
||||
" vstore3(tmp, tid, dst);\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
const char *uint4_mad24_kernel_code =
|
||||
"__kernel void test_uint4_mad24(__global uint4 *srcA, __global uint4 *srcB, __global uint4 *srcC, __global uint4 *dst)\n"
|
||||
"{\n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *uint8_mad24_kernel_code =
|
||||
"__kernel void test_uint8_mad24(__global uint8 *srcA, __global uint8 *srcB, __global uint8 *srcC, __global uint8 *dst)\n"
|
||||
"{\n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *uint16_mad24_kernel_code =
|
||||
"__kernel void test_uint16_mad24(__global uint16 *srcA, __global uint16 *srcB, __global uint16 *srcC, __global uint16 *dst)\n"
|
||||
"{\n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
int
|
||||
verify_int_mad24(int *inptrA, int *inptrB, int *inptrC, int *outptr, size_t n, size_t vecSize)
|
||||
{
|
||||
int r;
|
||||
size_t i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
int a = inptrA[i];
|
||||
int b = inptrB[i];
|
||||
r = a * b + inptrC[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error( "Failed at %ld) 0x%8.8x * 0x%8.8x + 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, a, b, inptrC[i], r, outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_uint_mad24(cl_uint *inptrA, cl_uint *inptrB, cl_uint *inptrC, cl_uint *outptr, size_t n, size_t vecSize)
|
||||
{
|
||||
cl_uint r;
|
||||
size_t i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
cl_uint a = inptrA[i] & 0xFFFFFFU;
|
||||
cl_uint b = inptrB[i] & 0xFFFFFFU;
|
||||
r = a * b + inptrC[i];
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error( "Failed at %ld) 0x%8.8x * 0x%8.8x + 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, a, b, inptrC[i], r, outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *test_str_names[] = { "int", "int2", "int3", "int4", "int8", "int16", "uint", "uint2", "uint3", "uint4", "uint8", "uint16" };
|
||||
|
||||
static inline int random_int24( MTdata d )
|
||||
{
|
||||
int result = genrand_int32(d);
|
||||
|
||||
return (result << 8) >> 8;
|
||||
}
|
||||
|
||||
static inline int random_int32( MTdata d )
|
||||
{
|
||||
return genrand_int32(d);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_intmad24(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_int *input_ptr[3], *output_ptr, *p;
|
||||
|
||||
cl_program program[2*NUM_PROGRAMS];
|
||||
cl_kernel kernel[2*NUM_PROGRAMS];
|
||||
size_t threads[1];
|
||||
|
||||
int num_elements;
|
||||
int err;
|
||||
int i;
|
||||
MTdata d;
|
||||
|
||||
size_t length = sizeof(cl_int) * 16 * n_elems;
|
||||
num_elements = n_elems * 16;
|
||||
|
||||
input_ptr[0] = (cl_int*)malloc(length);
|
||||
input_ptr[1] = (cl_int*)malloc(length);
|
||||
input_ptr[2] = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, 0, length, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
streams[1] = clCreateBuffer(context, 0, length, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
streams[2] = clCreateBuffer(context, 0, length, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
streams[3] = clCreateBuffer(context, 0, length, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = random_int24(d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = random_int24(d);
|
||||
p = input_ptr[2];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = random_int32(d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &int_mad24_kernel_code, "test_int_mad24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &int2_mad24_kernel_code, "test_int2_mad24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &int3_mad24_kernel_code, "test_int3_mad24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &int4_mad24_kernel_code, "test_int4_mad24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[4], &kernel[4], 1, &int8_mad24_kernel_code, "test_int8_mad24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[5], &kernel[5], 1, &int16_mad24_kernel_code, "test_int16_mad24");
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS], &kernel[NUM_PROGRAMS], 1, &uint_mad24_kernel_code, "test_uint_mad24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+1], &kernel[NUM_PROGRAMS+1], 1, &uint2_mad24_kernel_code, "test_uint2_mad24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+2], &kernel[NUM_PROGRAMS+2], 1, &uint3_mad24_kernel_code, "test_uint3_mad24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+3], &kernel[NUM_PROGRAMS+3], 1, &uint4_mad24_kernel_code, "test_uint4_mad24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+4], &kernel[NUM_PROGRAMS+4], 1, &uint8_mad24_kernel_code, "test_uint8_mad24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+5], &kernel[NUM_PROGRAMS+5], 1, &uint16_mad24_kernel_code, "test_uint16_mad24");
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
for (i=0; i< 2*NUM_PROGRAMS; i++)
|
||||
{
|
||||
err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2]);
|
||||
err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3]);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
}
|
||||
|
||||
|
||||
threads[0] = (unsigned int)n_elems;
|
||||
// test signed
|
||||
for (i=0; i<NUM_PROGRAMS; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
if (verify_int_mad24(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems * vector_sizes[i], vector_sizes[i]))
|
||||
{
|
||||
log_error("INT_MAD24 %s test failed\n", test_str_names[i]);
|
||||
err = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("INT_MAD24 %s test passed\n", test_str_names[i]);
|
||||
err = 0;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] &= 0xffffffU;
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] &= 0xffffffU;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
|
||||
// test unsigned
|
||||
for (i=NUM_PROGRAMS; i<2*NUM_PROGRAMS; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
if (verify_uint_mad24( (cl_uint*) input_ptr[0], (cl_uint*) input_ptr[1], (cl_uint*) input_ptr[2], (cl_uint*)output_ptr, n_elems * vector_sizes[i-NUM_PROGRAMS], vector_sizes[i-NUM_PROGRAMS]))
|
||||
{
|
||||
log_error("UINT_MAD24 %s test failed\n", test_str_names[i]);
|
||||
err = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("UINT_MAD24 %s test passed\n", test_str_names[i]);
|
||||
err = 0;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<2*NUM_PROGRAMS; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
390
test_conformance/integer_ops/test_intmul24.c
Normal file
390
test_conformance/integer_ops/test_intmul24.c
Normal file
@@ -0,0 +1,390 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#define NUM_PROGRAMS 6
|
||||
|
||||
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
|
||||
|
||||
|
||||
const char *int_mul24_kernel_code =
|
||||
"__kernel void test_int_mul24(__global int *srcA, __global int *srcB, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *int2_mul24_kernel_code =
|
||||
"__kernel void test_int2_mul24(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *int3_mul24_kernel_code =
|
||||
"__kernel void test_int3_mul24(__global int *srcA, __global int *srcB, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" int3 tmp = mul24(vload3(tid, srcA), vload3(tid, srcB));\n"
|
||||
" vstore3(tmp, tid, dst);\n"
|
||||
"}\n";
|
||||
|
||||
const char *int4_mul24_kernel_code =
|
||||
"__kernel void test_int4_mul24(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *int8_mul24_kernel_code =
|
||||
"__kernel void test_int8_mul24(__global int8 *srcA, __global int8 *srcB, __global int8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *int16_mul24_kernel_code =
|
||||
"__kernel void test_int16_mul24(__global int16 *srcA, __global int16 *srcB, __global int16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *uint_mul24_kernel_code =
|
||||
"__kernel void test_int_mul24(__global uint *srcA, __global uint *srcB, __global uint *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *uint2_mul24_kernel_code =
|
||||
"__kernel void test_int2_mul24(__global uint2 *srcA, __global uint2 *srcB, __global uint2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *uint3_mul24_kernel_code =
|
||||
"__kernel void test_int3_mul24(__global uint *srcA, __global uint *srcB, __global uint *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" uint3 tmp = mul24(vload3(tid, srcA), vload3(tid, srcB));\n"
|
||||
" vstore3(tmp, tid, dst);\n"
|
||||
"}\n";
|
||||
|
||||
const char *uint4_mul24_kernel_code =
|
||||
"__kernel void test_int4_mul24(__global uint4 *srcA, __global uint4 *srcB, __global uint4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *uint8_mul24_kernel_code =
|
||||
"__kernel void test_int8_mul24(__global uint8 *srcA, __global uint8 *srcB, __global uint8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
const char *uint16_mul24_kernel_code =
|
||||
"__kernel void test_int16_mul24(__global uint16 *srcA, __global uint16 *srcB, __global uint16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
int
|
||||
verify_int_mul24(int *inptrA, int *inptrB, int *outptr, size_t n, size_t vecSize)
|
||||
{
|
||||
int r;
|
||||
size_t i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
int a = (inptrA[i] << 8 ) >> 8;
|
||||
int b = (inptrB[i] << 8 ) >> 8;
|
||||
r = a * b;
|
||||
if (r != outptr[i])
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
verify_uint_mul24(cl_uint *inptrA, cl_uint *inptrB, cl_uint *outptr, size_t n, size_t vecSize)
|
||||
{
|
||||
cl_uint r;
|
||||
size_t i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r = (inptrA[i] & 0xffffffU) * (inptrB[i] & 0xffffffU);
|
||||
if (r != outptr[i])
|
||||
{
|
||||
log_error( "failed at %ld: 0x%8.8x * 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, inptrA[i], inptrB[i], r, outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int random_int24( MTdata d )
|
||||
{
|
||||
int result = genrand_int32(d);
|
||||
|
||||
return (result << 8) >> 8;
|
||||
}
|
||||
|
||||
|
||||
static const char *test_str_names[] = { "int", "int2", "int3", "int4", "int8", "int16", "uint", "uint2", "uint3", "uint4", "uint8", "uint16" };
|
||||
|
||||
int
|
||||
test_intmul24(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_mem streams[3];
|
||||
cl_int *input_ptr[2], *output_ptr, *p;
|
||||
|
||||
cl_program program[NUM_PROGRAMS*2];
|
||||
cl_kernel kernel[NUM_PROGRAMS*2];
|
||||
size_t threads[1];
|
||||
|
||||
int num_elements;
|
||||
int err;
|
||||
int i;
|
||||
MTdata d;
|
||||
|
||||
size_t length = sizeof(cl_int) * 16 * n_elems;
|
||||
num_elements = n_elems * 16;
|
||||
|
||||
input_ptr[0] = (cl_int*)malloc(length);
|
||||
input_ptr[1] = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = random_int24(d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] = random_int24(d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &int_mul24_kernel_code, "test_int_mul24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &int2_mul24_kernel_code, "test_int2_mul24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &int3_mul24_kernel_code, "test_int3_mul24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &int4_mul24_kernel_code, "test_int4_mul24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[4], &kernel[4], 1, &int8_mul24_kernel_code, "test_int8_mul24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[5], &kernel[5], 1, &int16_mul24_kernel_code, "test_int16_mul24");
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS], &kernel[NUM_PROGRAMS], 1, &uint_mul24_kernel_code, "test_int_mul24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+1], &kernel[NUM_PROGRAMS+1], 1, &uint2_mul24_kernel_code, "test_int2_mul24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+2], &kernel[NUM_PROGRAMS+2], 1, &uint3_mul24_kernel_code, "test_int3_mul24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+3], &kernel[NUM_PROGRAMS+3], 1, &uint4_mul24_kernel_code, "test_int4_mul24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+4], &kernel[NUM_PROGRAMS+4], 1, &uint8_mul24_kernel_code, "test_int8_mul24");
|
||||
if (err)
|
||||
return -1;
|
||||
err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+5], &kernel[NUM_PROGRAMS+5], 1, &uint16_mul24_kernel_code, "test_int16_mul24");
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
for (i=0; i<2*NUM_PROGRAMS; i++)
|
||||
{
|
||||
err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// test signed
|
||||
threads[0] = (unsigned int)n_elems;
|
||||
for (i=0; i<NUM_PROGRAMS; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_int_mul24(input_ptr[0], input_ptr[1], output_ptr, vector_sizes[i], vector_sizes[i]);
|
||||
if (err)
|
||||
{
|
||||
log_error("INT_MUL24 %s test failed\n", test_str_names[i]);
|
||||
err = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("INT_MUL24 %s test passed\n", test_str_names[i]);
|
||||
err = 0;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
// clamp the set of input values to be in range
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] &= 0xffffffU;
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements; i++)
|
||||
p[i] &= 0xffffffU;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// test unsigned
|
||||
for (i=NUM_PROGRAMS; i<2*NUM_PROGRAMS; i++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_uint_mul24((cl_uint*) input_ptr[0], (cl_uint*) input_ptr[1], (cl_uint*) output_ptr, n_elems * vector_sizes[i-NUM_PROGRAMS], vector_sizes[i-NUM_PROGRAMS]);
|
||||
if (err)
|
||||
{
|
||||
log_error("UINT_MUL24 %s test failed\n", test_str_names[i]);
|
||||
err = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("UINT_MUL24 %s test passed\n", test_str_names[i]);
|
||||
err = 0;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
for (i=0; i<2*NUM_PROGRAMS; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(output_ptr);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
248
test_conformance/integer_ops/test_popcount.c
Normal file
248
test_conformance/integer_ops/test_popcount.c
Normal file
@@ -0,0 +1,248 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#define str(s) #s
|
||||
|
||||
#define __popcnt(x, __T, __n, __r) \
|
||||
{ \
|
||||
__T y = x; \
|
||||
__r = 0; \
|
||||
int k; \
|
||||
for(k = 0; k < __n; k++) \
|
||||
{ \
|
||||
if(y & (__T)0x1) __r++; \
|
||||
y >>= (__T)1; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define __verify_popcount_func(__T) \
|
||||
static int verify_popcount_##__T( const void *p, const void *r, size_t n, const char *sizeName, size_t vecSize ) \
|
||||
{ \
|
||||
const __T *inA = (const __T *) p; \
|
||||
const __T *outptr = (const __T *) r; \
|
||||
size_t i; \
|
||||
int _n = sizeof(__T)*8; \
|
||||
__T ref; \
|
||||
for(i = 0; i < n; i++) \
|
||||
{ \
|
||||
__T x = inA[i]; \
|
||||
__T res = outptr[i]; \
|
||||
__popcnt(x, __T, _n, ref); \
|
||||
if(res != ref) \
|
||||
{ \
|
||||
log_info( "%ld) Failure for popcount( (%s%s) 0x%x ) = *%d vs %d\n", i, str(__T), sizeName, x, (int)ref, (int)res ); \
|
||||
return -1; \
|
||||
}\
|
||||
} \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
__verify_popcount_func(cl_char);
|
||||
__verify_popcount_func(cl_uchar);
|
||||
__verify_popcount_func(cl_short);
|
||||
__verify_popcount_func(cl_ushort);
|
||||
__verify_popcount_func(cl_int);
|
||||
__verify_popcount_func(cl_uint);
|
||||
__verify_popcount_func(cl_long);
|
||||
__verify_popcount_func(cl_ulong);
|
||||
|
||||
typedef int (*verifyFunc)( const void *, const void *, size_t n, const char *sizeName, size_t vecSize);
|
||||
static const verifyFunc verify[] = { verify_popcount_cl_char, verify_popcount_cl_uchar,
|
||||
verify_popcount_cl_short, verify_popcount_cl_ushort,
|
||||
verify_popcount_cl_int, verify_popcount_cl_uint,
|
||||
verify_popcount_cl_long, verify_popcount_cl_ulong };
|
||||
|
||||
static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
|
||||
|
||||
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
|
||||
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
|
||||
static const char *vector_param_size_names[] = { "", "2", "", "4", "8", "16" };
|
||||
static const size_t kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
|
||||
|
||||
static void printSrc(const char *src[], int nSrcStrings) {
|
||||
int i;
|
||||
for(i = 0; i < nSrcStrings; ++i) {
|
||||
log_info("%s", src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int test_popcount(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_int *input_ptr[1], *output_ptr, *p;
|
||||
int err;
|
||||
int i;
|
||||
cl_uint vectorSize;
|
||||
cl_uint type;
|
||||
MTdata d;
|
||||
int fail_count = 0;
|
||||
|
||||
size_t length = sizeof(cl_int) * 8 * n_elems;
|
||||
|
||||
input_ptr[0] = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<8 * n_elems; i++)
|
||||
p[i] = genrand_int32(d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
|
||||
{
|
||||
//embedded devices don't support long/ulong so skip over
|
||||
if (! gHasLong && strstr(test_str_names[type],"long"))
|
||||
{
|
||||
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
|
||||
continue;
|
||||
}
|
||||
|
||||
verifyFunc f = verify[ type ];
|
||||
// Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
|
||||
size_t elementCount = length / kSizes[type];
|
||||
cl_mem streams[2];
|
||||
|
||||
log_info( "%s", test_str_names[type] );
|
||||
fflush( stdout );
|
||||
|
||||
// Set up data streams for the type
|
||||
streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
|
||||
{
|
||||
cl_program program = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
|
||||
const char *source[] = {
|
||||
"__kernel void test_popcount_", test_str_names[type], vector_size_names[vectorSize],
|
||||
"(__global ", test_str_names[type], vector_param_size_names[vectorSize],
|
||||
" *srcA, __global ", test_str_names[type], vector_param_size_names[vectorSize],
|
||||
" *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" ", test_str_names[type], vector_size_names[vectorSize], " sA;\n",
|
||||
" sA = ", ( vector_sizes[ vectorSize ] == 3 ) ? "vload3( tid, srcA )" : "srcA[tid]", ";\n",
|
||||
" ", test_str_names[type], vector_size_names[vectorSize], " dstVal = popcount(sA);\n"
|
||||
" ", ( vector_sizes[ vectorSize ] == 3 ) ? "vstore3( dstVal, tid, dst )" : "dst[ tid ] = dstVal", ";\n",
|
||||
"}\n" };
|
||||
|
||||
|
||||
char kernelName[128];
|
||||
snprintf( kernelName, sizeof( kernelName ), "test_popcount_%s%s", test_str_names[type], vector_size_names[vectorSize] );
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
|
||||
|
||||
if (err) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
//Wipe the output buffer clean
|
||||
uint32_t pattern = 0xdeadbeef;
|
||||
memset_pattern4( output_ptr, &pattern, length );
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t size = elementCount / (vector_sizes[vectorSize]);
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *inP = (char *)input_ptr[0];
|
||||
char *outP = (char *)output_ptr;
|
||||
|
||||
for( size_t e = 0; e < size; e++ )
|
||||
{
|
||||
if( f( inP, outP, (vector_sizes[vectorSize]), vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
|
||||
printSrc(source, sizeof(source)/sizeof(source[0]));
|
||||
++fail_count; break; // return -1;
|
||||
}
|
||||
inP += kSizes[type] * ( (vector_sizes[vectorSize]) );
|
||||
outP += kSizes[type] * ( (vector_sizes[vectorSize]) );
|
||||
}
|
||||
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
log_info( "." );
|
||||
fflush( stdout );
|
||||
}
|
||||
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
log_info( "done\n" );
|
||||
}
|
||||
|
||||
|
||||
if(fail_count) {
|
||||
log_info("Failed on %d types\n", fail_count);
|
||||
return -1;
|
||||
}
|
||||
log_info("popcount test passed\n");
|
||||
|
||||
free(input_ptr[0]);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
376
test_conformance/integer_ops/test_sub_sat.c
Normal file
376
test_conformance/integer_ops/test_sub_sat.c
Normal file
@@ -0,0 +1,376 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#define UCHAR_MIN 0
|
||||
#define USHRT_MIN 0
|
||||
#define UINT_MIN 0
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) )
|
||||
#endif
|
||||
#ifndef MIN
|
||||
#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) )
|
||||
#endif
|
||||
|
||||
|
||||
static int verify_subsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
|
||||
r = MAX( r, CL_CHAR_MIN );
|
||||
r = MIN( r, CL_CHAR_MAX );
|
||||
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "\n%d) Failure for sub_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_subsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const cl_uchar *outptr, int n, const char *sizeName, int vecSize )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
|
||||
r = MAX( r, 0 );
|
||||
r = MIN( r, CL_UCHAR_MAX );
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "\n%d) Failure for sub_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_subsat_short( const cl_short *inA, const cl_short *inB, const cl_short *outptr, int n, const char *sizeName, int vecSize )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
|
||||
r = MAX( r, CL_SHRT_MIN );
|
||||
r = MIN( r, CL_SHRT_MAX );
|
||||
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "\n%d) Failure for sub_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_subsat_ushort( const cl_ushort *inA, const cl_ushort *inB, const cl_ushort *outptr, int n, const char *sizeName , int vecSize)
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
|
||||
r = MAX( r, 0 );
|
||||
r = MIN( r, CL_USHRT_MAX );
|
||||
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "\n%d) Failure for sub_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_subsat_int( const cl_int *inA, const cl_int *inB, const cl_int *outptr, int n, const char *sizeName , int vecSize)
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_int r = (cl_int) ((cl_uint)inA[i] - (cl_uint)inB[i]);
|
||||
if( inB[i] < 0 )
|
||||
{
|
||||
if( r < inA[i] )
|
||||
r = CL_INT_MAX;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( r > inA[i] )
|
||||
r = CL_INT_MIN;
|
||||
}
|
||||
|
||||
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "\n%d) Failure for sub_sat( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_subsat_uint( const cl_uint *inA, const cl_uint *inB, const cl_uint *outptr, int n, const char *sizeName , int vecSize)
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_uint r = inA[i] - inB[i];
|
||||
if( inA[i] < inB[i] )
|
||||
r = 0;
|
||||
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "\n%d) Failure for sub_sat( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_subsat_long( const cl_long *inA, const cl_long *inB, const cl_long *outptr, int n, const char *sizeName , int vecSize)
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_long r = (cl_long)((cl_ulong)inA[i] - (cl_ulong)inB[i]);
|
||||
if( inB[i] < 0 )
|
||||
{
|
||||
if( r < inA[i] )
|
||||
r = CL_LONG_MAX;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( r > inA[i] )
|
||||
r = CL_LONG_MIN;
|
||||
}
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%d) Failure for sub_sat( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_subsat_ulong( const cl_ulong *inA, const cl_ulong *inB, const cl_ulong *outptr, int n, const char *sizeName , int vecSize)
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < n; i++ )
|
||||
{
|
||||
cl_ulong r = inA[i] - inB[i];
|
||||
if( inA[i] < inB[i] )
|
||||
r = 0;
|
||||
if( r != outptr[i] )
|
||||
{ log_info( "%d) Failure for sub_sat( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName, int );
|
||||
static const verifyFunc verify[] = { (verifyFunc) verify_subsat_char, (verifyFunc) verify_subsat_uchar,
|
||||
(verifyFunc) verify_subsat_short, (verifyFunc) verify_subsat_ushort,
|
||||
(verifyFunc) verify_subsat_int, (verifyFunc) verify_subsat_uint,
|
||||
(verifyFunc) verify_subsat_long, (verifyFunc) verify_subsat_ulong };
|
||||
|
||||
static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
|
||||
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
|
||||
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
|
||||
|
||||
static const size_t kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
|
||||
|
||||
int test_sub_sat(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
int *input_ptr[2], *output_ptr, *p;
|
||||
int err;
|
||||
cl_uint i;
|
||||
cl_uint vectorSize;
|
||||
cl_uint type;
|
||||
MTdata d;
|
||||
int fail_count = 0;
|
||||
|
||||
size_t length = sizeof(int) * 4 * n_elems;
|
||||
|
||||
input_ptr[0] = (int*)malloc(length);
|
||||
input_ptr[1] = (int*)malloc(length);
|
||||
output_ptr = (int*)malloc(length);
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<4 * (cl_uint) n_elems; i++)
|
||||
p[i] = genrand_int32(d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<4 * (cl_uint) n_elems; i++)
|
||||
p[i] = genrand_int32(d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
|
||||
{
|
||||
|
||||
//embedded devices don't support long/ulong so skip over
|
||||
if (! gHasLong && strstr(test_str_names[type],"long"))
|
||||
{
|
||||
log_info( "WARNING: device does not support 64-bit integers. Skipping %s\n", test_str_names[type] );
|
||||
continue;
|
||||
}
|
||||
|
||||
verifyFunc f = verify[ type ];
|
||||
// Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
|
||||
size_t elementCount = length / kSizes[type];
|
||||
cl_mem streams[3];
|
||||
|
||||
log_info( "%s", test_str_names[type] );
|
||||
fflush( stdout );
|
||||
|
||||
// Set up data streams for the type
|
||||
streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
|
||||
{
|
||||
cl_program program = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
|
||||
const char *source[] = {
|
||||
"__kernel void test_sub_sat_", test_str_names[type], vector_size_names[vectorSize],
|
||||
"(__global ", test_str_names[type], vector_size_names[vectorSize],
|
||||
" *srcA, __global ", test_str_names[type], vector_size_names[vectorSize],
|
||||
" *srcB, __global ", test_str_names[type], vector_size_names[vectorSize],
|
||||
" *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" ", test_str_names[type], vector_size_names[vectorSize], " tmp = sub_sat(srcA[tid], srcB[tid]);\n"
|
||||
" dst[tid] = tmp;\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *sourceV3[] = {
|
||||
"__kernel void test_sub_sat_", test_str_names[type], vector_size_names[vectorSize],
|
||||
"(__global ", test_str_names[type],
|
||||
" *srcA, __global ", test_str_names[type],
|
||||
" *srcB, __global ", test_str_names[type],
|
||||
" *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" ", test_str_names[type], vector_size_names[vectorSize], " tmp = sub_sat(vload3(tid, srcA), vload3(tid, srcB));\n"
|
||||
" vstore3(tmp, tid, dst);\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
char kernelName[128];
|
||||
snprintf( kernelName, sizeof( kernelName ), "test_sub_sat_%s%s", test_str_names[type], vector_size_names[vectorSize] );
|
||||
if(vector_sizes[vectorSize] != 3)
|
||||
{
|
||||
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
|
||||
} else {
|
||||
err = create_single_kernel_helper(context, &program, &kernel, sizeof( sourceV3 ) / sizeof( sourceV3[0] ), sourceV3, kernelName );
|
||||
}
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
//Wipe the output buffer clean
|
||||
uint32_t pattern = 0xdeadbeef;
|
||||
memset_pattern4( output_ptr, &pattern, length );
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueWriteBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t size = elementCount / vector_sizes[vectorSize];
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *inP = (char *)input_ptr[0];
|
||||
char *inP2 = (char *)input_ptr[1];
|
||||
char *outP = (char *)output_ptr;
|
||||
|
||||
for( size_t e = 0; e < size; e++ )
|
||||
{
|
||||
if( f( inP, inP2, outP, vector_sizes[vectorSize], vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
|
||||
++fail_count; break; // return -1;
|
||||
}
|
||||
inP += kSizes[type] * vector_sizes[vectorSize];
|
||||
inP2 += kSizes[type] * vector_sizes[vectorSize];
|
||||
outP += kSizes[type] * vector_sizes[vectorSize];
|
||||
}
|
||||
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
log_info( "." );
|
||||
fflush( stdout );
|
||||
}
|
||||
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
clReleaseMemObject( streams[2] );
|
||||
log_info( "done\n" );
|
||||
}
|
||||
if(fail_count) {
|
||||
log_info("Failed on %d types\n", fail_count);
|
||||
return -1;
|
||||
}
|
||||
log_info("SUB_SAT test passed\n");
|
||||
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
211
test_conformance/integer_ops/test_unary_ops.cpp
Normal file
211
test_conformance/integer_ops/test_unary_ops.cpp
Normal file
@@ -0,0 +1,211 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
|
||||
#define TEST_SIZE 512
|
||||
|
||||
enum OpKonstants
|
||||
{
|
||||
kIncrement = 0,
|
||||
kDecrement,
|
||||
kBoth
|
||||
};
|
||||
|
||||
const char *testKernel =
|
||||
"__kernel void test( __global %s *inOut, __global char * control )\n"
|
||||
"{\n"
|
||||
" size_t tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" %s%s inOutVal = %s;\n"
|
||||
"\n"
|
||||
" if( control[tid] == 0 )\n"
|
||||
" inOutVal++;\n"
|
||||
" else if( control[tid] == 1 )\n"
|
||||
" ++inOutVal;\n"
|
||||
" else if( control[tid] == 2 )\n"
|
||||
" inOutVal--;\n"
|
||||
" else // if( control[tid] == 3 )\n"
|
||||
" --inOutVal;\n"
|
||||
"\n"
|
||||
" %s;\n"
|
||||
"}\n";
|
||||
|
||||
typedef int (*OpVerifyFn)( void * actualPtr, void * inputPtr, size_t vecSize, size_t numVecs, cl_char * controls );
|
||||
|
||||
int test_unary_op( cl_command_queue queue, cl_context context, OpKonstants whichOp,
|
||||
ExplicitType vecType, size_t vecSize,
|
||||
MTdata d, OpVerifyFn verifyFn )
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
cl_long inData[TEST_SIZE * 16], outData[TEST_SIZE * 16];
|
||||
cl_char controlData[TEST_SIZE];
|
||||
int error;
|
||||
size_t i;
|
||||
size_t threads[1], localThreads[1];
|
||||
char kernelSource[10240];
|
||||
char *programPtr;
|
||||
|
||||
|
||||
// Create the source
|
||||
char loadLine[ 1024 ], storeLine[ 1024 ];
|
||||
if( vecSize == 1 )
|
||||
{
|
||||
sprintf( loadLine, "inOut[tid]" );
|
||||
sprintf( storeLine, "inOut[tid] = inOutVal" );
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf( loadLine, "vload%ld( tid, inOut )", vecSize );
|
||||
sprintf( storeLine, "vstore%ld( inOutVal, tid, inOut )", vecSize );
|
||||
}
|
||||
|
||||
char sizeNames[][4] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
|
||||
sprintf( kernelSource, testKernel, get_explicit_type_name( vecType ), /*sizeNames[ vecSize ],*/
|
||||
get_explicit_type_name( vecType ), sizeNames[ vecSize ],
|
||||
loadLine, storeLine );
|
||||
|
||||
// Create the kernel
|
||||
programPtr = kernelSource;
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test" ) )
|
||||
{
|
||||
log_error( "ERROR: Unable to create test program!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Generate two streams. The first is our random data to test against, the second is our control stream
|
||||
generate_random_data( vecType, vecSize * TEST_SIZE, d, inData );
|
||||
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
|
||||
get_explicit_type_size( vecType ) * vecSize * TEST_SIZE,
|
||||
inData, &error );
|
||||
test_error( error, "Creating input data array failed" );
|
||||
|
||||
cl_uint bits;
|
||||
for( i = 0; i < TEST_SIZE; i++ )
|
||||
{
|
||||
size_t which = i & 7;
|
||||
if( which == 0 )
|
||||
bits = genrand_int32(d);
|
||||
|
||||
controlData[ i ] = ( bits >> ( which << 1 ) ) & 0x03;
|
||||
if( whichOp == kDecrement )
|
||||
// For sub ops, the min control value is 2. Otherwise, it's 0
|
||||
controlData[ i ] |= 0x02;
|
||||
else if( whichOp == kIncrement )
|
||||
// For addition ops, the MAX control value is 1. Otherwise, it's 3
|
||||
controlData[ i ] &= ~0x02;
|
||||
}
|
||||
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
|
||||
sizeof( controlData ), controlData, &error );
|
||||
test_error( error, "Unable to create control stream" );
|
||||
|
||||
// Assign streams and execute
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
|
||||
// Run the kernel
|
||||
threads[0] = TEST_SIZE;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
|
||||
|
||||
// Read the results
|
||||
error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0,
|
||||
get_explicit_type_size( vecType ) * TEST_SIZE * vecSize,
|
||||
outData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read output array!" );
|
||||
|
||||
// Now verify the results
|
||||
return verifyFn( outData, inData, vecSize, TEST_SIZE, controlData );
|
||||
}
|
||||
|
||||
template<typename T> int VerifyFn( void * actualPtr, void * inputPtr, size_t vecSize, size_t numVecs, cl_char * controls )
|
||||
{
|
||||
T * actualData = (T *)actualPtr;
|
||||
T * inputData = (T *)inputPtr;
|
||||
|
||||
size_t index = 0;
|
||||
for( size_t i = 0; i < numVecs; i++ )
|
||||
{
|
||||
for( size_t j = 0; j < vecSize; j++, index++ )
|
||||
{
|
||||
T nextVal = inputData[ index ];
|
||||
if( controls[ i ] & 0x02 )
|
||||
nextVal--;
|
||||
else
|
||||
nextVal++;
|
||||
|
||||
if( actualData[ index ] != nextVal )
|
||||
{
|
||||
log_error( "ERROR: Validation failed on vector %ld:%ld (expected %lld, got %lld)", i, j,
|
||||
(cl_long)nextVal, (cl_long)actualData[ index ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_unary_op_set( cl_command_queue queue, cl_context context, OpKonstants whichOp )
|
||||
{
|
||||
ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
|
||||
OpVerifyFn verifys[] = { VerifyFn<cl_char>, VerifyFn<cl_uchar>, VerifyFn<cl_short>, VerifyFn<cl_ushort>, VerifyFn<cl_int>, VerifyFn<cl_uint>, VerifyFn<cl_long>, VerifyFn<cl_ulong>, NULL };
|
||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||
unsigned int index, typeIndex;
|
||||
int retVal = 0;
|
||||
RandomSeed seed(gRandomSeed );
|
||||
|
||||
for( typeIndex = 0; types[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
|
||||
{
|
||||
if ((types[ typeIndex ] == kLong || types[ typeIndex ] == kULong) && !gHasLong)
|
||||
continue;
|
||||
|
||||
for( index = 0; vecSizes[ index ] != 0; index++ )
|
||||
{
|
||||
if( test_unary_op( queue, context, whichOp, types[ typeIndex ], vecSizes[ index ], seed, verifys[ typeIndex ] ) != 0 )
|
||||
{
|
||||
log_error( " Vector %s%d FAILED\n", get_explicit_type_name( types[ typeIndex ] ), vecSizes[ index ] );
|
||||
retVal = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
int test_unary_ops_full(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_unary_op_set( queue, context, kBoth );
|
||||
}
|
||||
|
||||
int test_unary_ops_increment(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_unary_op_set( queue, context, kIncrement );
|
||||
}
|
||||
|
||||
int test_unary_ops_decrement(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_unary_op_set( queue, context, kDecrement );
|
||||
}
|
||||
263
test_conformance/integer_ops/test_upsample.cpp
Normal file
263
test_conformance/integer_ops/test_upsample.cpp
Normal file
@@ -0,0 +1,263 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
|
||||
static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
|
||||
#define NUM_VECTOR_SIZES 6
|
||||
|
||||
const char *permute_2_param_kernel_pattern =
|
||||
"__kernel void test_upsample(__global %s *sourceA, __global %s *sourceB, __global %s *destValues)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
const char *permute_2_param_kernel_pattern_v3srcdst =
|
||||
"__kernel void test_upsample(__global %s *sourceA, __global %s *sourceB, __global %s *destValues)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" vstore3( %s( vload3(tid,sourceA), vload3(tid, sourceB) ), tid, destValues);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
int test_upsample_2_param_fn(cl_command_queue queue, cl_context context, const char *fnName, ExplicitType sourceAType, ExplicitType sourceBType, ExplicitType outType,
|
||||
size_t sourceAVecSize, size_t sourceBVecSize, size_t outVecSize, size_t count,
|
||||
void *sourceA, void *sourceB, void *expectedResults )
|
||||
{
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
int error, retCode = 0;
|
||||
cl_mem streams[3];
|
||||
void *outData;
|
||||
size_t threadSize, groupSize, i;
|
||||
unsigned char *expectedPtr, *outPtr;
|
||||
size_t sourceATypeSize, sourceBTypeSize, outTypeSize, outStride;
|
||||
char programSource[ 10240 ], aType[ 64 ], bType[ 64 ], tType[ 64 ];
|
||||
const char *progPtr;
|
||||
|
||||
|
||||
sourceATypeSize = get_explicit_type_size( sourceAType );
|
||||
sourceBTypeSize = get_explicit_type_size( sourceBType );
|
||||
outTypeSize = get_explicit_type_size( outType );
|
||||
|
||||
outStride = outTypeSize * outVecSize;
|
||||
outData = malloc( outStride * count );
|
||||
|
||||
/* Construct the program */
|
||||
strcpy( aType, get_explicit_type_name( sourceAType ) );
|
||||
strcpy( bType, get_explicit_type_name( sourceBType ) );
|
||||
strcpy( tType, get_explicit_type_name( outType ) );
|
||||
if( sourceAVecSize > 1 && sourceAVecSize != 3)
|
||||
sprintf( aType + strlen( aType ), "%d", (int)sourceAVecSize );
|
||||
if( sourceBVecSize > 1 && sourceBVecSize != 3)
|
||||
sprintf( bType + strlen( bType ), "%d", (int)sourceBVecSize );
|
||||
if( outVecSize > 1 && outVecSize != 3)
|
||||
sprintf( tType + strlen( tType ), "%d", (int)outVecSize );
|
||||
|
||||
if(sourceAVecSize == 3 && sourceBVecSize == 3 && outVecSize == 3)
|
||||
{
|
||||
// permute_2_param_kernel_pattern_v3srcdst
|
||||
sprintf( programSource, permute_2_param_kernel_pattern_v3srcdst, aType, bType, tType, fnName );
|
||||
}
|
||||
else if(sourceAVecSize != 3 && sourceBVecSize != 3 && outVecSize != 3)
|
||||
{
|
||||
sprintf( programSource, permute_2_param_kernel_pattern, aType, bType, tType, fnName );
|
||||
} else {
|
||||
vlog_error("Not implemented for %d,%d -> %d\n",
|
||||
(int)sourceAVecSize, (int)sourceBVecSize, (int)outVecSize);
|
||||
return -1;
|
||||
}
|
||||
|
||||
progPtr = (const char *)programSource;
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, &progPtr, "test_upsample" ) )
|
||||
{
|
||||
free( outData );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Set up parameters */
|
||||
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sourceATypeSize * sourceAVecSize * count, sourceA, NULL );
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("ERROR: Creating input array A failed!\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sourceBTypeSize * sourceBVecSize * count, sourceB, NULL );
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("ERROR: Creating input array B failed!\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), outStride * count, NULL, NULL );
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("ERROR: Creating output array failed!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2] );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
/* Run the kernel */
|
||||
threadSize = count;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &threadSize, &groupSize, 0, NULL, NULL );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
|
||||
/* Now verify the results. Each value should have been duplicated four times, and we should be able to just
|
||||
do a memcpy instead of relying on the actual type of data */
|
||||
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, outStride * count, outData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read output values!" );
|
||||
|
||||
expectedPtr = (unsigned char *)expectedResults;
|
||||
outPtr = (unsigned char *)outData;
|
||||
|
||||
for( i = 0; i < count; i++ )
|
||||
{
|
||||
if( memcmp( outPtr, expectedPtr, outTypeSize * outVecSize ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Output value %d does not validate!\n", (int)i );
|
||||
retCode = -1;
|
||||
break;
|
||||
}
|
||||
expectedPtr += outTypeSize * outVecSize;
|
||||
outPtr += outStride;
|
||||
}
|
||||
|
||||
clReleaseMemObject( streams[0] );
|
||||
clReleaseMemObject( streams[1] );
|
||||
clReleaseMemObject( streams[2] );
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
free( outData );
|
||||
|
||||
return retCode;
|
||||
}
|
||||
|
||||
void * create_upsample_data( ExplicitType type, void *sourceA, void *sourceB, size_t count )
|
||||
{
|
||||
void *outData;
|
||||
size_t i, tSize;
|
||||
|
||||
tSize = get_explicit_type_size( type );
|
||||
outData = malloc( tSize * count * 2 );
|
||||
|
||||
switch( tSize )
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
const cl_uchar *aPtr = (const cl_uchar *) sourceA;
|
||||
const cl_uchar *bPtr = (const cl_uchar *) sourceB;
|
||||
cl_ushort *dPtr = (cl_ushort*) outData;
|
||||
for( i = 0; i < count; i++ )
|
||||
{
|
||||
cl_ushort u = *bPtr++;
|
||||
u |= ((cl_ushort) *aPtr++) << 8;
|
||||
*dPtr++ = u;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
{
|
||||
const cl_ushort *aPtr = (const cl_ushort *) sourceA;
|
||||
const cl_ushort *bPtr = (const cl_ushort *) sourceB;
|
||||
cl_uint *dPtr = (cl_uint*) outData;
|
||||
for( i = 0; i < count; i++ )
|
||||
{
|
||||
cl_uint u = *bPtr++;
|
||||
u |= ((cl_uint) *aPtr++) << 16;
|
||||
*dPtr++ = u;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
{
|
||||
const cl_uint *aPtr = (const cl_uint *) sourceA;
|
||||
const cl_uint *bPtr = (const cl_uint *) sourceB;
|
||||
cl_ulong *dPtr = (cl_ulong*) outData;
|
||||
for( i = 0; i < count; i++ )
|
||||
{
|
||||
cl_ulong u = *bPtr++;
|
||||
u |= ((cl_ulong) *aPtr++) << 32;
|
||||
*dPtr++ = u;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
log_error( "ERROR: unknown type size: %ld\n", tSize );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return outData;
|
||||
}
|
||||
|
||||
int test_integer_upsample(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
ExplicitType typesToTest[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kNumExplicitTypes };
|
||||
ExplicitType baseTypes[] = { kUChar, kUChar, kUShort, kUShort, kUInt, kUInt, kNumExplicitTypes };
|
||||
ExplicitType outTypes[] = { kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
|
||||
int i, err = 0;
|
||||
int sizeIndex;
|
||||
size_t size;
|
||||
void *sourceA, *sourceB, *expected;
|
||||
RandomSeed seed(gRandomSeed );
|
||||
|
||||
for( i = 0; typesToTest[ i ] != kNumExplicitTypes; i++ )
|
||||
{
|
||||
if ((outTypes[i] == kLong || outTypes[i] == kULong) && !gHasLong)
|
||||
{
|
||||
log_info( "Longs unsupported on this device. Skipping...\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
for( sizeIndex = 0; sizeIndex < NUM_VECTOR_SIZES; sizeIndex++)
|
||||
{
|
||||
size = (size_t)vector_sizes[sizeIndex];
|
||||
log_info("running upsample test for %s %s vector size %d\n", get_explicit_type_name(typesToTest[i]), get_explicit_type_name(baseTypes[i]), (int)size);
|
||||
sourceA = create_random_data( typesToTest[ i ], seed, 256 );
|
||||
sourceB = create_random_data( baseTypes[ i ], seed, 256 );
|
||||
expected = create_upsample_data( typesToTest[ i ], sourceA, sourceB, 256 );
|
||||
|
||||
if( test_upsample_2_param_fn( queue, context, "upsample",
|
||||
typesToTest[ i ], baseTypes[ i ],
|
||||
outTypes[ i ],
|
||||
size, size, size,
|
||||
256 / size,
|
||||
sourceA, sourceB, expected ) != 0 )
|
||||
{
|
||||
log_error( "TEST FAILED: %s for %s%d\n", "upsample", get_explicit_type_name( typesToTest[ i ] ), (int)size );
|
||||
err = -1;
|
||||
}
|
||||
free( sourceA );
|
||||
free( sourceB );
|
||||
free( expected );
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
1675
test_conformance/integer_ops/verification_and_generation_functions.c
Normal file
1675
test_conformance/integer_ops/verification_and_generation_functions.c
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user