Initial open source release of OpenCL 2.2 CTS.

This commit is contained in:
Kedar Patil
2017-05-16 18:25:37 +05:30
parent 6911ba5116
commit 2821bf1323
1035 changed files with 343518 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
set_source_files_properties(COMPILE_FLAGS -msse2)
string(TOLOWER ${MODULE_NAME} MODULE_NAME_LOWER)
set(${MODULE_NAME}_OUT ${CONFORMANCE_PREFIX}${MODULE_NAME_LOWER}${CONFORMANCE_SUFFIX})
add_executable(${${MODULE_NAME}_OUT} ${${MODULE_NAME}_SOURCES})
set_source_files_properties(${${MODULE_NAME}_SOURCES} PROPERTIES LANGUAGE CXX)
set_property(TARGET ${${MODULE_NAME}_OUT} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
TARGET_LINK_LIBRARIES(${${MODULE_NAME}_OUT} ${CLConform_LIBRARIES})

View File

@@ -0,0 +1,92 @@
# Remember current source directory (`test_conformance').
set( CLConf_Install_Base_Dir "${CMAKE_CURRENT_SOURCE_DIR}" )
add_subdirectory( allocations )
add_subdirectory( api )
add_subdirectory( atomics )
add_subdirectory( basic )
add_subdirectory( buffers )
add_subdirectory( commonfns )
add_subdirectory( compatibility )
add_subdirectory( compiler )
add_subdirectory( computeinfo )
add_subdirectory( contractions )
add_subdirectory( conversions )
if(D3D10_IS_SUPPORTED)
add_subdirectory( d3d10 )
endif(D3D10_IS_SUPPORTED)
if(D3D11_IS_SUPPORTED)
add_subdirectory( d3d11 )
endif(D3D11_IS_SUPPORTED)
add_subdirectory( device_partition )
add_subdirectory( events )
add_subdirectory( geometrics )
if(GL_IS_SUPPORTED)
add_subdirectory( gl )
endif(GL_IS_SUPPORTED)
if(GLES_IS_SUPPORTED)
add_subdirectory(gles)
endif(GLES_IS_SUPPORTED)
add_subdirectory( half )
add_subdirectory( headers )
add_subdirectory( images )
add_subdirectory( integer_ops )
add_subdirectory( math_brute_force )
add_subdirectory( mem_host_flags )
add_subdirectory( multiple_device_context )
add_subdirectory( printf )
add_subdirectory( profiling )
add_subdirectory( relationals )
add_subdirectory( select )
add_subdirectory( thread_dimensions )
add_subdirectory( vec_align )
add_subdirectory( vec_step )
add_subdirectory( c11_atomics )
add_subdirectory( device_execution )
add_subdirectory( non_uniform_work_group )
add_subdirectory( SVM )
add_subdirectory( generic_address_space )
add_subdirectory( subgroups )
add_subdirectory( workgroups )
add_subdirectory( pipes )
add_subdirectory( device_timer )
add_subdirectory( clcpp )
set(CSV_FILES
opencl_conformance_tests_21_full_spirv.csv
opencl_conformance_tests_21_legacy_wimpy.csv
opencl_conformance_tests_22.csv
opencl_conformance_tests_generate_spirv.csv
opencl_conformance_tests_conversions.csv
opencl_conformance_tests_d3d.csv
opencl_conformance_tests_full.csv
opencl_conformance_tests_full_no_math_or_conversions.csv
opencl_conformance_tests_math.csv
opencl_conformance_tests_quick.csv
)
set(PY_FILES
run_conformance.py
)
# Copy .csv files
foreach(FILE ${CSV_FILES})
configure_file(${FILE} ${FILE} COPYONLY)
endforeach()
# Copy test run script
foreach(FILE ${PY_FILES})
if(WIN32)
configure_file(${FILE} ${FILE} COPYONLY)
else(WIN32)
# Copy to CMakeFiles
configure_file(${FILE} ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${FILE} COPYONLY)
# Copy to final destination and set permissions
file(COPY ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${FILE}
DESTINATION ${CMAKE_BINARY_DIR}/test_conformance
FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ
GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
endif(WIN32)
endforeach()
foreach(FILE test_conformance/${PY_FILES})
endforeach()

24
test_conformance/Jamfile Normal file
View File

@@ -0,0 +1,24 @@
project
: requirements
<library>/harness//harness
<warnings>off
;
use-project /harness : ../test_common/harness ;
proj_lst = allocations api atomics basic buffers commonfns compiler
computeinfo contractions conversions events geometrics gl
half images integer_ops math_brute_force multiple_device_context
profiling relationals select thread_dimensions ;
for proj in $(proj_lst)
{
build-project $(proj) ;
}
install data
: [ glob *.csv ] [ glob *.py ]
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance
<variant>release:<location>$(DIST)/release/tests/test_conformance
;

61
test_conformance/Makefile Normal file
View File

@@ -0,0 +1,61 @@
PRODUCTS = \
allocations/ \
api/ \
atomics/ \
basic/ \
buffers/ \
commonfns/ \
compatibility/test_conformance/ \
compiler/ \
computeinfo/ \
contractions/ \
conversions/ \
device_partition/ \
events/ \
geometrics/ \
gl/ \
half/ \
headers/ \
images/ \
integer_ops/ \
math_brute_force/ \
mem_host_flags/ \
multiple_device_context/ \
printf/ \
profiling/ \
relationals/ \
select/ \
thread_dimensions/ \
vec_align/ \
vec_step/ \
workgroups/
TOP=$(shell pwd)
all: $(PRODUCTS)
clean:
@for testdir in $(dir $(PRODUCTS)) ; \
do ( \
echo "==================================================================================" ; \
echo "Cleaning $$testdir" ; \
echo "==================================================================================" ; \
if test -d $$testdir; \
then cd $$testdir && make clean; \
else echo "Warning: Directory '$$testdir' Does Not Exist"; \
fi; \
); \
done \
$(PRODUCTS):
@echo "==================================================================================" ;
@echo "(`date "+%H:%M:%S"`) Make $@" ;
@echo "==================================================================================" ;
@if test -d $@; \
then cd $(dir $@) && make -i; \
else echo "Warning: Directory '$@' Does Not Exist"; \
fi; \
.PHONY: clean $(PRODUCTS) all

View File

@@ -0,0 +1,26 @@
set(MODULE_NAME SVM)
set(${MODULE_NAME}_SOURCES
main.cpp
test_allocate_shared_buffer.cpp
test_byte_granularity.cpp
test_cross_buffer_pointers.cpp
test_enqueue_api.cpp
test_fine_grain_memory_consistency.cpp
test_fine_grain_sync_buffers.cpp
test_pointer_passing.cpp
test_set_kernel_exec_info_svm_ptrs.cpp
test_shared_address_space_coarse_grain.cpp
test_shared_address_space_fine_grain.cpp
test_shared_address_space_fine_grain_buffers.cpp
test_shared_sub_buffers.cpp
test_migrate.cpp
../../test_common/harness/testHarness.c
../../test_common/harness/errorHelpers.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/mt19937.c
../../test_common/harness/msvc9.c
../../test_common/harness/parseParameters.cpp
)
include(../CMakeCommon.txt)

View File

@@ -0,0 +1,54 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.c \
test_allocate_shared_buffer.cpp \
test_byte_granularity.cpp \
test_cross_buffer_pointers.cpp \
test_enqueue_api.cpp \
test_fine_grain_memory_consistency.cpp \
test_fine_grain_sync_buffers.cpp \
test_pointer_passing.cpp \
test_set_kernel_exec_info_svm_ptrs.cpp \
test_shared_address_space_coarse_grain.cpp \
test_shared_address_space_fine_grain_buffers.cpp \
test_shared_address_space_fine_grain.cpp \
test_shared_sub_buffers.cpp \
test_migrate.cpp \
../../test_common/harness/errorHelpers.c \
../../test_common/harness/threadTesting.c \
../../test_common/harness/testHarness.c \
../../test_common/harness/kernelHelpers.c \
../../test_common/harness/typeWrappers.cpp \
../../test_common/harness/mt19937.c \
DEFINES = DONT_TEST_GARBAGE_POINTERS
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
HEADERS =
TARGET = test_SVM
INCLUDE =
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,101 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef __COMMON_H__
#define __COMMON_H__
#include "../../test_common/harness/compat.h"
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/typeWrappers.h"
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
#include <windows.h>
#endif
typedef enum {
memory_order_relaxed,
memory_order_acquire,
memory_order_release,
memory_order_acq_rel,
memory_order_seq_cst
} cl_memory_order;
cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order);
cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o);
template <typename T>
bool AtomicCompareExchangeStrongExplicit(volatile T *a, T *expected, T desired,
cl_memory_order order_success,
cl_memory_order order_failure)
{
T tmp;
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
tmp = (T)InterlockedCompareExchange((volatile LONG *)a, (LONG)desired, *(LONG *)expected);
#elif defined(__GNUC__)
tmp = (T)__sync_val_compare_and_swap((volatile intptr_t*)a, (intptr_t)(*expected), (intptr_t)desired);
#else
log_info("Host function not implemented: atomic_compare_exchange\n");
tmp = 0;
#endif
if(tmp == *expected)
return true;
*expected = tmp;
return false;
}
// this checks for a NULL ptr and/or an error code
#define test_error2(error_code, ptr, msg) { if(error != 0) { test_error(error_code, msg); } else { if(NULL == ptr) {print_null_error(msg); return -1;} } }
#define print_null_error(msg) log_error("ERROR: %s! (NULL pointer detected %s:%d)\n", msg, __FILE__, __LINE__ );
// max possible number of queues needed, 1 for each device in platform.
#define MAXQ 32
typedef struct Node{
cl_int global_id;
cl_int position_in_list;
struct Node* pNext;
} Node;
extern void create_linked_lists(Node* pNodes, size_t num_lists, int list_length);
extern cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length);
extern cl_int create_linked_lists_on_device(int qi, cl_command_queue q, cl_mem allocator, cl_kernel k, size_t numLists );
extern cl_int verify_linked_lists_on_device(int qi, cl_command_queue q, cl_mem num_correct, cl_kernel k, cl_int ListLength, size_t numLists );
extern cl_int create_linked_lists_on_device_no_map(int qi, cl_command_queue q, size_t *pAllocator, cl_kernel k, size_t numLists );
extern cl_int verify_linked_lists_on_device_no_map(int qi, cl_command_queue q, cl_int *pNum_correct, cl_kernel k, cl_int ListLength, size_t numLists );
extern int test_byte_granularity(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_fine_grain_memory_consistency(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_fine_grain_sync_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_pointer_passing(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_allocate_shared_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_shared_sub_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_enqueue_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps);
extern const char *linked_list_create_and_verify_kernels[];
#endif // #ifndef __COMMON_H__

View File

@@ -0,0 +1,317 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <vector>
#include <sstream>
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/kernelHelpers.h"
#include "common.h"
// SVM Atomic wrappers.
// Platforms that support SVM atomics (atomics that work across the host and devices) need to implement these host side functions correctly.
// Platforms that do not support SVM atomics can simpy implement these functions as empty stubs since the functions will not be called.
// For now only Windows x86 is implemented, add support for other platforms as needed.
cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order)
{
#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
return *pValue; // provided the value is aligned x86 doesn't need anything more than this for seq_cst.
#elif defined(__GNUC__)
return __sync_add_and_fetch(pValue, 0);
#else
log_error("ERROR: AtomicLoadExplicit function not implemented\n");
return -1;
#endif
}
// all the x86 atomics are seq_cst, so don't need to do anything with the memory order parameter.
cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o)
{
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
return InterlockedExchangeAdd( (volatile LONG*) object, operand);
#elif defined(__GNUC__)
return __sync_fetch_and_add(object, operand);
#else
log_error("ERROR: AtomicFetchAddExplicit function not implemented\n");
return -1;
#endif
}
cl_int AtomicExchangeExplicit(volatile cl_int *object, cl_int desired, cl_memory_order mo)
{
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
return InterlockedExchange( (volatile LONG*) object, desired);
#elif defined(__GNUC__)
return __sync_lock_test_and_set(object, desired);
#else
log_error("ERROR: AtomicExchangeExplicit function not implemented\n");
return -1;
#endif
}
const char *linked_list_create_and_verify_kernels[] = {
"typedef struct Node {\n"
" int global_id;\n"
" int position_in_list;\n"
" __global struct Node* pNext;\n"
"} Node;\n"
"\n"
// The allocation_index parameter must be initialized on the host to N work-items
// The first N nodes in pNodes will be the heads of the lists.
"__kernel void create_linked_lists(__global Node* pNodes, volatile __attribute__((nosvm)) __global int* allocation_index, int list_length)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" __global Node *pNode = &pNodes[i];\n"
"\n"
" pNode->global_id = i;\n"
" pNode->position_in_list = 0;\n"
"\n"
" __global Node *pNew;\n"
" for(int j=1; j < list_length; j++)\n"
" {\n"
" pNew = &pNodes[ atomic_inc(allocation_index) ];// allocate a new node\n"
" pNew->global_id = i;\n"
" pNew->position_in_list = j;\n"
" pNode->pNext = pNew; // link new node onto end of list\n"
" pNode = pNew; // move to end of list\n"
" }\n"
"}\n"
"__kernel void verify_linked_lists(__global Node* pNodes, volatile __global uint* num_correct, int list_length)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" __global Node *pNode = &pNodes[i];\n"
"\n"
" for(int j=0; j < list_length; j++)\n"
" {\n"
" if( pNode->global_id == i && pNode->position_in_list == j)\n"
" {\n"
" atomic_inc(num_correct);\n"
" } \n"
" else {\n"
" break;\n"
" }\n"
" pNode = pNode->pNext;\n"
" }\n"
"}\n"
};
// The first N nodes in pNodes will be the heads of the lists.
void create_linked_lists(Node* pNodes, size_t num_lists, int list_length)
{
size_t allocation_index = num_lists; // heads of lists are in first num_lists nodes.
for(cl_uint i = 0; i < num_lists; i++)
{
Node *pNode = &pNodes[i];
pNode->global_id = i;
pNode->position_in_list = 0;
Node *pNew;
for(int j=1; j < list_length; j++)
{
pNew = &pNodes[ allocation_index++ ];// allocate a new node
pNew->global_id = i;
pNew->position_in_list = j;
pNode->pNext = pNew; // link new node onto end of list
pNode = pNew; // move to end of list
}
}
}
cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length)
{
cl_int error = CL_SUCCESS;
int numCorrect = 0;
log_info(" and verifying on host ");
for(cl_uint i=0; i < num_lists; i++)
{
Node *pNode = &pNodes[i];
for(int j=0; j < list_length; j++)
{
if( pNode->global_id == i && pNode->position_in_list == j)
{
numCorrect++;
}
else {
break;
}
pNode = pNode->pNext;
}
}
if(numCorrect != list_length * (cl_uint)num_lists)
{
error = -1;
log_info("Failed\n");
}
else
log_info("Passed\n");
return error;
}
// Note that we don't use the context provided by the test harness since it doesn't support multiple devices,
// so we create are own context here that has all devices, we use the same platform that the harness used.
cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps)
{
cl_int error;
cl_platform_id platform_id;
// find out what platform the harness is using.
error = clGetDeviceInfo(device_from_harness, CL_DEVICE_PLATFORM,sizeof(cl_platform_id),&platform_id,NULL);
test_error(error,"clGetDeviceInfo failed");
error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 0, NULL, num_devices );
test_error(error, "clGetDeviceIDs failed");
std::vector<cl_device_id> devicesTmp(*num_devices), devices, capable_devices;
error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, *num_devices, &devicesTmp[0], NULL );
test_error(error, "clGetDeviceIDs failed");
devices.push_back(device_from_harness);
for (size_t i = 0; i < devicesTmp.size(); ++i)
{
if (device_from_harness != devicesTmp[i])
devices.push_back(devicesTmp[i]);
}
// Select only the devices that support the SVM level needed for the test.
// Note that if requested SVM capabilities are not supported by any device then the test still passes (even though it does not execute).
cl_device_svm_capabilities caps;
cl_uint num_capable_devices = 0;
for(cl_uint i = 0; i < *num_devices; i++)
{
size_t ret_len = 0;
error = clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, 0, 0, &ret_len);
if (error != CL_SUCCESS)
{
log_error("clGetDeviceInfo failed %s\n", IGetErrorString(error));
return -1;
}
std::vector<char> oclVersion(ret_len + 1);
error = clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(char) * oclVersion.size(), &oclVersion[0], 0);
if (error != CL_SUCCESS)
{
log_error("clGetDeviceInfo failed %s\n", IGetErrorString(error));
return -1;
}
std::string versionStr(&oclVersion[7]);
std::stringstream stream;
stream << versionStr;
double version = 0.0;
stream >> version;
if(device_from_harness != devices[i] && version < 2.0)
{
continue;
}
error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
test_error(error,"clGetDeviceInfo failed for CL_DEVICE_MEM_SHARING");
if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS)))
{
log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value");
return -1;
}
if((caps & required_svm_caps) == required_svm_caps)
{
capable_devices.push_back(devices[i]);
++num_capable_devices;
}
}
devices = capable_devices; // the only devices we care about from here on are the ones capable of supporting the requested SVM level.
*num_devices = num_capable_devices;
if(num_capable_devices == 0)
// if(svm_level > CL_DEVICE_COARSE_SVM && 0 == num_capable_devices)
{
log_info("Requested SVM level not supported by any device on this platform, test not executed.\n");
return 1; // 1 indicates do not execute, but counts as passing.
}
cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, NULL };
*context = clCreateContext(context_properties, *num_devices, &devices[0], NULL, NULL, &error);
test_error(error, "Unable to create context" );
// *queues = (cl_command_queue *) malloc( *num_devices * sizeof( cl_command_queue ) );
for(cl_uint i = 0; i < *num_devices; i++)
{
queues[i] = clCreateCommandQueueWithProperties(*context, devices[i], 0, &error);
test_error(error, "clCreateCommandQueue failed");
}
if(ppCodeString)
{
error = create_single_kernel_helper(*context, program, 0, 1, ppCodeString, 0, "-cl-std=CL2.0");
test_error( error, "failed to create program" );
}
return 0;
}
basefn basefn_list[] = {
test_byte_granularity,
test_set_kernel_exec_info_svm_ptrs,
test_fine_grain_memory_consistency,
test_fine_grain_sync_buffers,
test_shared_address_space_fine_grain,
test_shared_sub_buffers,
test_shared_address_space_fine_grain_buffers,
test_allocate_shared_buffer,
test_shared_address_space_coarse_grain_old_api,
test_shared_address_space_coarse_grain_new_api,
test_cross_buffer_pointers_coarse_grain,
test_svm_pointer_passing,
test_enqueue_api,
test_migrate,
};
const char *basefn_names[] = {
"svm_byte_granularity",
"svm_set_kernel_exec_info_svm_ptrs",
"svm_fine_grain_memory_consistency",
"svm_fine_grain_sync_buffers",
"svm_shared_address_space_fine_grain",
"svm_shared_sub_buffers",
"svm_shared_address_space_fine_grain_buffers",
"svm_allocate_shared_buffer",
"svm_shared_address_space_coarse_grain_old_api",
"svm_shared_address_space_coarse_grain_new_api",
"svm_cross_buffer_pointers_coarse_grain",
"svm_pointer_passing",
"svm_enqueue_api",
"svm_migrate_mem",
};
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
int num_fns = sizeof(basefn_names) / sizeof(char *);
int main(int argc, const char *argv[])
{
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, true, 0 );
}

View File

@@ -0,0 +1,107 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
const cl_mem_flags flag_set[] = {
CL_MEM_READ_WRITE,
CL_MEM_WRITE_ONLY,
CL_MEM_READ_ONLY,
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
0
};
const char* flag_set_names[] = {
"CL_MEM_READ_WRITE",
"CL_MEM_WRITE_ONLY",
"CL_MEM_READ_ONLY",
"CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER",
"CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER",
"CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER",
"CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
"CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
"CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
"0"
};
int test_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int err = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
cl_device_svm_capabilities caps;
err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
test_error(err,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
// under construction...
err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(err) return -1;
size_t size = 1024;
// iteration over flag combos
int num_flags = sizeof(flag_set)/sizeof(cl_mem_flags);
for(int i = 0; i < num_flags; i++)
{
if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0 && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0)
|| ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0 && (caps & CL_DEVICE_SVM_ATOMICS) == 0))
{
log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]);
continue;
}
log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]);
cl_char *pBufData1 = (cl_char*) clSVMAlloc(context, flag_set[i], size, 0);
if(pBufData1 == NULL)
{
log_error("SVMalloc returned NULL");
return -1;
}
{
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, size, pBufData1, &err);
test_error(err,"clCreateBuffer failed");
cl_char *pBufData2 = NULL;
cl_uint flags = CL_MAP_READ | CL_MAP_READ;
if(flag_set[i] & CL_MEM_HOST_READ_ONLY) flags ^= CL_MAP_WRITE;
if(flag_set[i] & CL_MEM_HOST_WRITE_ONLY) flags ^= CL_MAP_READ;
if(!(flag_set[i] & CL_MEM_HOST_NO_ACCESS))
{
pBufData2 = (cl_char*) clEnqueueMapBuffer(queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL,NULL, &err);
test_error(err, "clEnqueueMapBuffer failed");
if(pBufData2 != pBufData1 || NULL == pBufData1)
{
log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc");
return -1;
}
}
}
clSVMFree(context, pBufData1);
}
return 0;
}

View File

@@ -0,0 +1,148 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
const char *byte_manipulation_kernels[] = {
// Each device will write it's id into the bytes that it "owns", ownership is based on round robin (global_id % num_id)
// num_id is equal to number of SVM devices in the system plus one (for the host code).
// id is the index (id) of the device that this kernel is executing on.
// For example, if there are 2 SVM devices and the host; the buffer should look like this after each device and the host write their id's:
// 0, 1, 2, 0, 1, 2, 0, 1, 2...
"__kernel void write_owned_locations(__global char* a, uint num_id, uint id)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" int owner = i % num_id;\n"
" if(id == owner) \n"
" a[i] = id;\n" // modify location if it belongs to this device, write id
"}\n"
// Verify that a device can see the byte sized updates from the other devices, sum up the device id's and see if they match expected value.
// Note: this must be called with a reduced NDRange so that neighbor acesses don't go past end of buffer.
// For example if there are two SVM devices and the host (3 total devices) the buffer should look like this:
// 0,1,2,0,1,2...
// and the expected sum at each point is 0+1+2 = 3.
"__kernel void sum_neighbor_locations(__global char* a, uint num_devices, volatile __global uint* error_count)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" uint expected_sum = (num_devices * (num_devices - 1))/2;\n"
" uint sum = 0;\n"
" for(uint j=0; j<num_devices; j++) {\n"
" sum += a[i + j];\n" // add my neighbors to the right
" }\n"
" if(sum != expected_sum)\n"
" atomic_inc(error_count);\n"
"}\n"
};
int test_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
{
clContextWrapper context;
clProgramWrapper program;
clKernelWrapper k1,k2;
clCommandQueueWrapper queues[MAXQ];
cl_uint num_devices = 0;
cl_int err = CL_SUCCESS;
cl_int rval = CL_SUCCESS;
err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(err < 0) return -1; // fail test.
cl_uint num_devices_plus_host = num_devices + 1;
k1 = clCreateKernel(program, "write_owned_locations", &err);
test_error(err, "clCreateKernel failed");
k2 = clCreateKernel(program, "sum_neighbor_locations", &err);
test_error(err, "clCreateKernel failed");
cl_char *pA = (cl_char*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_char) * num_elements, 0);
cl_uint **error_counts = (cl_uint**) malloc(sizeof(void*) * num_devices);
for(cl_uint i=0; i < num_devices; i++) {
error_counts[i] = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint), 0);
*error_counts[i] = 0;
}
for(int i=0; i < num_elements; i++) pA[i] = -1;
err |= clSetKernelArgSVMPointer(k1, 0, pA);
err |= clSetKernelArg(k1, 1, sizeof(cl_uint), &num_devices_plus_host);
test_error(err, "clSetKernelArg failed");
// get all the devices going simultaneously
size_t element_num = num_elements;
for(cl_uint d=0; d < num_devices; d++) // device ids starting at 1.
{
err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d);
test_error(err, "clSetKernelArg failed");
err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL, 0, NULL, NULL);
test_error(err,"clEnqueueNDRangeKernel failed");
}
for(cl_uint d=0; d < num_devices; d++) clFlush(queues[d]);
cl_uint host_id = num_devices; // host code will take the id above the devices.
for(int i = (int)num_devices; i < num_elements; i+= num_devices_plus_host) pA[i] = host_id;
for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
// now check that each device can see the byte writes made by the other devices.
err |= clSetKernelArgSVMPointer(k2, 0, pA);
err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host);
test_error(err, "clSetKernelArg failed");
// adjusted so k2 doesn't read past end of buffer
size_t adjusted_num_elements = num_elements - num_devices;
for(cl_uint id = 0; id < num_devices; id++)
{
err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]);
test_error(err, "clSetKernelArg failed");
err = clEnqueueNDRangeKernel(queues[id], k2, 1, NULL, &adjusted_num_elements, NULL, 0, NULL, NULL);
test_error(err,"clEnqueueNDRangeKernel failed");
}
for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
bool failed = false;
// see if any of the devices found errors
for(cl_uint i=0; i < num_devices; i++) {
if(*error_counts[i] > 0)
failed = true;
}
cl_uint expected = (num_devices_plus_host * (num_devices_plus_host - 1))/2;
// check that host can see the byte writes made by the devices.
for(cl_uint i = 0; i < num_elements - num_devices_plus_host; i++)
{
int sum = 0;
for(cl_uint j=0; j < num_devices_plus_host; j++) sum += pA[i+j];
if(sum != expected)
failed = true;
}
clSVMFree(context, pA);
for(cl_uint i=0; i < num_devices; i++) clSVMFree(context, error_counts[i]);
if(failed)
return -1;
return 0;
}

View File

@@ -0,0 +1,219 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
// create linked lists that use nodes from two different buffers.
const char *SVMCrossBufferPointers_test_kernel[] = {
"\n"
"typedef struct Node {\n"
" int global_id;\n"
" int position_in_list;\n"
" __global struct Node* pNext;\n"
"} Node;\n"
"\n"
"__global Node* allocate_node(__global Node* pNodes1, __global Node* pNodes2, volatile __global int* allocation_index, size_t i)\n"
"{\n"
// mix things up, adjacent work items will allocate from different buffers
" if(i & 0x1)\n"
" return &pNodes1[atomic_inc(allocation_index)];\n"
" else\n"
" return &pNodes2[atomic_inc(allocation_index)];\n"
"}\n"
"\n"
// The allocation_index parameter must be initialized on the host to N work-items
// The first N nodes in pNodes will be the heads of the lists.
"__kernel void create_linked_lists(__global Node* pNodes, __global Node* pNodes2, volatile __global int* allocation_index, int list_length)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" __global Node *pNode = &pNodes[i];\n"
"\n"
" pNode->global_id = i;\n"
" pNode->position_in_list = 0;\n"
"\n"
" __global Node *pNew;\n"
" for(int j=1; j < list_length; j++)\n"
" {\n"
" pNew = allocate_node(pNodes, pNodes2, allocation_index, i);\n"
" pNew->global_id = i;\n"
" pNew->position_in_list = j;\n"
" pNode->pNext = pNew; // link new node onto end of list\n"
" pNode = pNew; // move to end of list\n"
" }\n"
"}\n"
"\n"
"__kernel void verify_linked_lists(__global Node* pNodes, __global Node* pNodes2, volatile __global uint* num_correct, int list_length)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" __global Node *pNode = &pNodes[i];\n"
"\n"
" for(int j=0; j < list_length; j++)\n"
" {\n"
" if( pNode->global_id == i && pNode->position_in_list == j)\n"
" {\n"
" atomic_inc(num_correct);\n"
" }\n"
" else {\n"
" break;\n"
" }\n"
" pNode = pNode->pNext;\n"
" }\n"
"}\n"
};
// Creates linked list using host code.
cl_int create_linked_lists_on_host(cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
{
cl_int error = CL_SUCCESS;
log_info("SVM: creating linked list on host ");
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes2, "clEnqueueMapBuffer failed");
create_linked_lists(pNodes, numLists, ListLength);
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clFinish(cmdq);
test_error(error, "clFinish failed");
return error;
}
// Verify correctness of the linked list using host code.
cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
{
cl_int error = CL_SUCCESS;
//log_info(" and verifying on host ");
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
error = verify_linked_lists(pNodes, numLists, ListLength);
if(error) return -1;
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clFinish(cmdq);
test_error(error, "clFinish failed");
return error;
}
// This tests that shared buffers are able to contain pointers that point to other shared buffers.
// This tests that all devices and the host share a common address space; using only the coarse-grain features.
// This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host.
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
// This basic test is performed for all combinations of devices and the host.
int test_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error) return -1;
size_t numLists = num_elements;
cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
// this buffer holds some of the linked list nodes.
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
// this buffer holds some of the linked list nodes.
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
{
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
test_error(error, "clCreateBuffer failed.");
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the index into the nodes buffer that is used for node allocation
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the count of correct nodes which is computed by the verify kernel.
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
//error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) pNodes);
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &allocator);
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int), (void *) &ListLength);
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes);
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &num_correct);
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int), (void *) &ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host).
// Do this for all possible combinations of devices and host within the platform.
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
{
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
{
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
{
error = create_linked_lists_on_host(queues[0], nodes, nodes2, ListLength, numLists);
if(error) return -1;
}
else
{
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
if(error) return -1;
}
if(vi == num_devices)
{
error = verify_linked_lists_on_host(vi, queues[0], nodes, nodes2, ListLength, numLists);
if(error) return -1;
}
else
{
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
if(error) return -1;
}
} // inner loop, vi
} // outer loop, ci
}
clSVMFree(context, pNodes2);
clSVMFree(context, pNodes);
return 0;
}

View File

@@ -0,0 +1,254 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
#include "../../test_common/harness/mt19937.h"
#include <vector>
typedef struct
{
cl_uint status;
cl_uint num_svm_pointers;
std::vector<void *> svm_pointers;
} CallbackData;
void generate_data(std::vector<cl_uchar> &data, size_t size, MTdata seed)
{
cl_uint randomData = genrand_int32(seed);
cl_uint bitsLeft = 32;
for( size_t i = 0; i < size; i++ )
{
if( 0 == bitsLeft)
{
randomData = genrand_int32(seed);
bitsLeft = 32;
}
data[i] = (cl_uchar)( randomData & 255 );
randomData >>= 8; randomData -= 8;
}
}
//callback which will be passed to clEnqueueSVMFree command
void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_pointers, void * svm_pointers[], void * user_data)
{
CallbackData *data = (CallbackData *)user_data;
data->num_svm_pointers = num_svm_pointers;
data->svm_pointers.resize(num_svm_pointers, 0);
cl_context context;
if(clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, 0) != CL_SUCCESS)
{
log_error("clGetCommandQueueInfo failed in the callback\n");
return;
}
for (size_t i = 0; i < num_svm_pointers; ++i)
{
data->svm_pointers[i] = svm_pointers[i];
clSVMFree(context, svm_pointers[i]);
}
data->status = 1;
}
int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clCommandQueueWrapper queues[MAXQ];
cl_uint num_devices = 0;
const size_t elementNum = 1024;
const size_t numSVMBuffers = 32;
cl_int error = CL_SUCCESS;
RandomSeed seed(0);
error = create_cl_objects(deviceID, NULL, &context, NULL, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error) return -1;
queue = queues[0];
//all possible sizes of vectors and scalars
size_t typeSizes[] = {
sizeof(cl_uchar),
sizeof(cl_uchar2),
sizeof(cl_uchar3),
sizeof(cl_uchar4),
sizeof(cl_uchar8),
sizeof(cl_uchar16),
sizeof(cl_ushort),
sizeof(cl_ushort2),
sizeof(cl_ushort3),
sizeof(cl_ushort4),
sizeof(cl_ushort8),
sizeof(cl_ushort16),
sizeof(cl_uint),
sizeof(cl_uint2),
sizeof(cl_uint3),
sizeof(cl_uint4),
sizeof(cl_uint8),
sizeof(cl_uint16),
sizeof(cl_ulong),
sizeof(cl_ulong2),
sizeof(cl_ulong3),
sizeof(cl_ulong4),
sizeof(cl_ulong8),
sizeof(cl_ulong16),
};
for (size_t i = 0; i < ( sizeof(typeSizes) / sizeof(typeSizes[0]) ); ++i)
{
//generate initial data
std::vector<cl_uchar> fillData0(typeSizes[i]), fillData1(typeSizes[i], 0), fillData2(typeSizes[i]);
generate_data(fillData0, typeSizes[i], seed);
generate_data(fillData2, typeSizes[i], seed);
cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum * typeSizes[i], 0);
cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum * typeSizes[i], 0);
clEventWrapper userEvent = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
clEventWrapper eventMemFill;
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0], typeSizes[i], elementNum * typeSizes[i], 1, &userEvent, &eventMemFill);
test_error(error, "clEnqueueSVMMemFill failed");
clEventWrapper eventMemcpy;
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, elementNum * typeSizes[i], 1, &eventMemFill, &eventMemcpy);
test_error(error, "clEnqueueSVMMemcpy failed");
error = clSetUserEventStatus(userEvent, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
clEventWrapper eventMap;
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, dstBuffer, elementNum * typeSizes[i], 1, &eventMemcpy, &eventMap);
test_error(error, "clEnqueueSVMMap failed");
error = clWaitForEvents(1, &eventMap);
test_error(error, "clWaitForEvents failed");
//data verification
for (size_t j = 0; j < elementNum * typeSizes[i]; ++j)
{
if (dstBuffer[j] != fillData0[j % typeSizes[i]])
{
log_error("Invalid data at index %ld, expected %d, got %d\n", j, fillData0[j % typeSizes[i]], dstBuffer[j]);
return -1;
}
}
clEventWrapper eventUnmap;
error = clEnqueueSVMUnmap(queue, dstBuffer, 0, 0, &eventUnmap);
test_error(error, "clEnqueueSVMUnmap failed");
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData2[0], typeSizes[i], elementNum * typeSizes[i] / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemFill(queue, dstBuffer + elementNum * typeSizes[i] / 2, &fillData2[0], typeSizes[i], elementNum * typeSizes[i] / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, elementNum * typeSizes[i] / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemcpy failed");
error = clEnqueueSVMMemcpy(queue, CL_TRUE, dstBuffer + elementNum * typeSizes[i] / 2, srcBuffer + elementNum * typeSizes[i] / 2, elementNum * typeSizes[i] / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemcpy failed");
void *ptrs[] = {(void *)srcBuffer, (void *)dstBuffer};
clEventWrapper eventFree;
error = clEnqueueSVMFree(queue, 2, ptrs, 0, 0, 0, 0, &eventFree);
test_error(error, "clEnqueueSVMFree failed");
error = clWaitForEvents(1, &eventFree);
test_error(error, "clWaitForEvents failed");
//event info verification for new SVM commands
cl_command_type commandType;
error = clGetEventInfo(eventMemFill, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MEMFILL)
{
log_error("Invalid command type returned for clEnqueueSVMMemFill\n");
return -1;
}
error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MEMCPY)
{
log_error("Invalid command type returned for clEnqueueSVMMemcpy\n");
return -1;
}
error = clGetEventInfo(eventMap, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MAP)
{
log_error("Invalid command type returned for clEnqueueSVMMap\n");
return -1;
}
error = clGetEventInfo(eventUnmap, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_UNMAP)
{
log_error("Invalid command type returned for clEnqueueSVMUnmap\n");
return -1;
}
error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_FREE)
{
log_error("Invalid command type returned for clEnqueueSVMFree\n");
return -1;
}
}
std::vector<void *> buffers(numSVMBuffers, 0);
for(size_t i = 0; i < numSVMBuffers; ++i) buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0);
//verify if callback is triggered correctly
CallbackData data;
data.status = 0;
error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0], callback_svm_free, &data, 0, 0, 0);
test_error(error, "clEnqueueSVMFree failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
//wait for the callback
while(data.status == 0) { }
//check if number of SVM pointers returned in the callback matches with expected
if (data.num_svm_pointers != buffers.size())
{
log_error("Invalid number of SVM pointers returned in the callback, expected: %ld, got: %d\n", buffers.size(), data.num_svm_pointers);
return -1;
}
//check if pointers returned in callback are correct
for (size_t i = 0; i < buffers.size(); ++i)
{
if (data.svm_pointers[i] != buffers[i])
{
log_error("Invalid SVM pointer returned in the callback, idx: %ld\n", i);
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,168 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
const char *hash_table_kernel[] = {
"typedef struct BinNode {\n"
" int value;\n"
" atomic_uintptr_t pNext;\n"
"} BinNode;\n"
"__kernel void build_hash_table(__global uint* input, __global BinNode* pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
"{\n"
" __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, 1, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
" uint i = get_global_id(0);\n"
" uint b = input[i] % numBins;\n"
" pNew->value = input[i];\n"
" uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), memory_order_seq_cst, memory_scope_all_svm_devices);\n"
" do\n"
" {\n"
" atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, memory_scope_all_svm_devices);\n" // always inserting at head of list
" } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), &next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, memory_scope_all_svm_devices));\n"
"}\n"
};
typedef struct BinNode{
cl_uint value;
struct BinNode* pNext;
} BinNode;
void build_hash_table_on_host(cl_context c, cl_uint* input, size_t inputSize, BinNode* pNodes, cl_int volatile *pNumNodes, cl_uint numBins)
{
for(cl_uint i = 0; i < inputSize; i++)
{
BinNode *pNew = &pNodes[ AtomicFetchAddExplicit(pNumNodes, 1, memory_order_relaxed) ];
cl_uint b = input[i] % numBins;
pNew->value = input[i];
BinNode *next = pNodes[b].pNext;
do {
pNew->pNext = next; // always inserting at head of list
} while(!AtomicCompareExchangeStrongExplicit(&(pNodes[b].pNext), &next, pNew, memory_order_relaxed, memory_order_seq_cst));
}
}
int launch_kernels_and_verify(clContextWrapper &context, clCommandQueueWrapper* queues, clKernelWrapper &kernel, cl_uint num_devices, cl_uint numBins, size_t num_pixels)
{
int err = CL_SUCCESS;
cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0);
BinNode *pNodes = (BinNode*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(BinNode) * (num_pixels * (num_devices + 1) + numBins), 0);
cl_int *pNumNodes = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int), 0);
*pNumNodes = numBins; // using the first numBins nodes to hold the list heads.
for(cl_uint i=0;i<numBins;i++) {
pNodes[i].pNext = NULL;
};
for(cl_uint i=0; i < num_pixels; i++) pInputImage[i] = i;
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
err |= clSetKernelArgSVMPointer(kernel, 1, pNodes);
err |= clSetKernelArgSVMPointer(kernel, 2, pNumNodes);
err |= clSetKernelArg(kernel, 3, sizeof(cl_uint), (void*) &numBins);
test_error(err, "clSetKernelArg failed");
cl_event done;
// get all the devices going simultaneously, each device (and the host) will insert all the pixels.
for(cl_uint d=0; d<num_devices; d++)
{
err = clEnqueueNDRangeKernel(queues[d], kernel, 1, NULL, &num_pixels, 0, 0, NULL, &done);
test_error(err,"clEnqueueNDRangeKernel failed");
}
for(cl_uint d=0; d<num_devices; d++) clFlush(queues[d]);
// wait until we see some activity from a device (try to run host side simultaneously).
while(numBins == AtomicLoadExplicit(pNumNodes, memory_order_relaxed));
build_hash_table_on_host(context, pInputImage, num_pixels, pNodes, pNumNodes, numBins);
for(cl_uint d=0; d<num_devices; d++) clFinish(queues[d]);
cl_uint num_items = 0;
// check correctness of each bin in the hash table.
for(cl_uint i = 0; i < numBins; i++)
{
BinNode *pNode = pNodes[i].pNext;
while(pNode)
{
if((pNode->value % numBins) != i)
{
log_error("Something went wrong, item is in wrong hash bucket\n");
break;
}
num_items++;
pNode = pNode->pNext;
}
}
clSVMFree(context, pInputImage);
clSVMFree(context, pNodes);
clSVMFree(context, pNumNodes);
// each device and the host inserted all of the pixels, check that none are missing.
if(num_items != num_pixels * (num_devices + 1) )
{
log_error("The hash table is not correct, num items %d, expected num items: %d\n", num_items, num_pixels * (num_devices + 1));
return -1; // test did not pass
}
return 0;
}
// This tests for memory consistency across devices and the host.
// Each device and the host simultaneously insert values into a single hash table.
// Each bin in the hash table is a linked list. Each bin is protected against simultaneous
// update using a lock free technique. The correctness of the list is verfied on the host.
// This test requires the new OpenCL 2.0 atomic operations that implement the new seq_cst memory ordering.
int test_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
{
clContextWrapper context;
clProgramWrapper program;
clKernelWrapper kernel;
clCommandQueueWrapper queues[MAXQ];
cl_uint num_devices = 0;
cl_int err = CL_SUCCESS;
if (sizeof(void *) == 8 && (!is_extension_available(deviceID, "cl_khr_int64_base_atomics") || !is_extension_available(deviceID, "cl_khr_int64_extended_atomics")))
{
log_info("WARNING: test skipped. 'cl_khr_int64_base_atomics' and 'cl_khr_int64_extended_atomics' extensions are not supported\n");
return 0;
}
err = create_cl_objects(deviceID, &hash_table_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS);
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(err < 0) return -1; // fail test.
kernel = clCreateKernel(program, "build_hash_table", &err);
test_error(err, "clCreateKernel failed");
size_t num_pixels = num_elements;
int result;
cl_uint numBins = 1; // all work groups in all devices and the host code will hammer on this one lock.
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
if(result == -1) return result;
numBins = 2; // 2 locks within in same cache line will get hit from different devices and host.
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
if(result == -1) return result;
numBins = 29; // locks span a few cache lines.
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
if(result == -1) return result;
return result;
}

View File

@@ -0,0 +1,105 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
const char *find_targets_kernel[] = {
"__kernel void find_targets(__global uint* image, uint target, volatile __global atomic_uint *numTargetsFound, volatile __global atomic_uint *targetLocations)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" uint index;\n"
" if(image[i] == target) {\n"
" index = atomic_fetch_add_explicit(numTargetsFound, 1, memory_order_relaxed, memory_scope_device); \n"
" atomic_exchange_explicit(&targetLocations[index], i, memory_order_relaxed, memory_scope_all_svm_devices); \n"
" }\n"
"}\n"
};
void spawnAnalysisTask(int location)
{
// printf("found target at location %d\n", location);
}
#define MAX_TARGETS 1024
// Goals: demonstrate use of SVM's atomics to do fine grain synchronization between the device and host.
// Concept: a device kernel is used to search an input image for regions that match a target pattern.
// The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices).
// The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets.
int test_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int err = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
err = create_cl_objects(deviceID, &find_targets_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS);
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(err < 0) return -1; // fail test.
clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err);
test_error(err, "clCreateKernel failed");
size_t num_pixels = num_elements;
//cl_uint num_pixels = 1024*1024*32;
cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0);
cl_uint *pNumTargetsFound = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_uint), 0);
cl_int *pTargetLocations = (cl_int* ) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int) * MAX_TARGETS, 0);
cl_uint targetDescriptor = 777;
*pNumTargetsFound = 0;
cl_uint i;
for(i=0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1;
for(i=0; i < num_pixels; i++) pInputImage[i] = 0;
pInputImage[0] = targetDescriptor;
pInputImage[3] = targetDescriptor;
pInputImage[num_pixels - 1] = targetDescriptor;
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), (void*) &targetDescriptor);
err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound);
err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations);
test_error(err, "clSetKernelArg failed");
cl_event done;
err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, 0, NULL, &done);
test_error(err,"clEnqueueNDRangeKernel failed");
clFlush(queues[0]);
i=0;
cl_int status;
// check for new targets, if found spawn a task to analyze target.
do {
err = clGetEventInfo(done,CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL);
test_error(err,"clGetEventInfo failed");
if( AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1) // -1 indicates slot not used yet.
{
spawnAnalysisTask(pTargetLocations[i]);
i++;
}
} while (status != CL_COMPLETE || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1);
clSVMFree(context, pInputImage);
clSVMFree(context, pNumTargetsFound);
clSVMFree(context, pTargetLocations);
if(i != 3) return -1;
return 0;
}

View File

@@ -0,0 +1,330 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
#include "../../test_common/harness/mt19937.h"
#define GLOBAL_SIZE 65536
static const char *sources[] = {
"__kernel void migrate_kernel(__global uint * restrict a, __global uint * restrict b, __global uint * restrict c)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" a[i] ^= 0x13579bdf;\n"
" b[i] ^= 0x2468ace0;\n"
" c[i] ^= 0x731fec8f;\n"
"}\n"
};
static void
fill_buffer(cl_uint* p, size_t n, MTdata seed)
{
for (size_t i=0; i<n; ++i)
p[i] = (cl_uint)genrand_int32(seed);
}
static bool
check(const char* s, cl_uint* a, cl_uint* e, size_t n)
{
bool ok = true;
for (size_t i=0; ok && i<n; ++i) {
if (a[i] != e[i]) {
log_error("ERROR: %s mismatch at word %u, *%08x vs %08x\n", s, (unsigned int)i, e[i], a[i]);
ok = false;
}
}
return ok;
}
static int
wait_and_release(const char* s, cl_event* evs, int n)
{
cl_int error = clWaitForEvents(n, evs);
if (error == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) {
for (int i=0; i<n; ++i) {
cl_int e;
error = clGetEventInfo(evs[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &e, NULL);
test_error(error, "clGetEventInfo failed");
if (e != CL_COMPLETE) {
log_error("ERROR: %s event %d execution status was %s\n", s, i, IGetErrorString(e));
return e;
}
}
} else
test_error(error, "clWaitForEvents failed");
for (int i=0; i<n; ++i) {
error = clReleaseEvent(evs[i]);
test_error(error, "clReleaseEvent failed");
}
return 0;
}
int
test_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
{
cl_uint amem[GLOBAL_SIZE];
cl_uint bmem[GLOBAL_SIZE];
cl_uint cmem[GLOBAL_SIZE];
cl_uint ramem[GLOBAL_SIZE];
cl_uint rbmem[GLOBAL_SIZE];
cl_uint rcmem[GLOBAL_SIZE];
cl_event evs[20];
const size_t global_size = GLOBAL_SIZE;
RandomSeed seed(0);
clContextWrapper context = NULL;
clCommandQueueWrapper queues[MAXQ];
cl_uint num_devices = 0;
clProgramWrapper program;
cl_int error;
error = create_cl_objects(deviceID, &sources[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if (error)
return -1;
cl_command_queue queue0 = queues[0];
clCommandQueueWrapper queue1;
if (num_devices > 1) {
log_info(" Running on two devices.\n");
queue1 = queues[1];
} else {
// Ensure we have two distinct queues
cl_device_id did;
error = clGetCommandQueueInfo(queue0, CL_QUEUE_DEVICE, sizeof(did), (void *)&did, NULL);
test_error(error, "clGetCommandQueueInfo failed");
cl_command_queue_properties cqp;
error = clGetCommandQueueInfo(queue0, CL_QUEUE_PROPERTIES, sizeof(cqp), &cqp, NULL);
test_error(error, "clGetCommandQueueInfo failed");
cl_queue_properties qp[3] = { CL_QUEUE_PROPERTIES, cqp, 0 };
queue1 = clCreateCommandQueueWithProperties(context, did, qp, &error);
test_error(error, "clCteateCommandQueueWithProperties failed");
}
clKernelWrapper kernel = clCreateKernel(program, "migrate_kernel", &error);
test_error(error, "clCreateKernel failed");
char* asvm = (char*)clSVMAlloc(context, CL_MEM_READ_WRITE, global_size*sizeof(cl_uint), 16);
if (asvm == NULL) {
log_error("ERROR: clSVMAlloc returned NULL at %s:%d\n", __FILE__, __LINE__);
return -1;
}
char* bsvm = (char *)clSVMAlloc(context, CL_MEM_READ_WRITE, global_size*sizeof(cl_uint), 16);
if (bsvm == NULL) {
log_error("ERROR: clSVMAlloc returned NULL at %s:%d\n", __FILE__, __LINE__);
clSVMFree(context, asvm);
return -1;
}
char* csvm = (char *)clSVMAlloc(context, CL_MEM_READ_WRITE, global_size*sizeof(cl_uint), 16);
if (csvm == NULL) {
log_error("ERROR: clSVMAlloc returned NULL at %s:%d\n", __FILE__, __LINE__);
clSVMFree(context, bsvm);
clSVMFree(context, asvm);
return -1;
}
error = clSetKernelArgSVMPointer(kernel, 0, (void*)asvm);
test_error(error, "clSetKernelArgSVMPointer failed");
error = clSetKernelArgSVMPointer(kernel, 1, (void*)bsvm);
test_error(error, "clSetKernelArgSVMPointer failed");
error = clSetKernelArgSVMPointer(kernel, 2, (void*)csvm);
test_error(error, "clSetKernelArgSVMPointer failed");
// Initialize host copy of data (and result)
fill_buffer(amem, global_size, seed);
fill_buffer(bmem, global_size, seed);
fill_buffer(cmem, global_size, seed);
// Now we're ready to start
{
// First, fill in the data on device0
cl_uint patt[] = { 0, 0, 0, 0};
error = clEnqueueSVMMemFill(queue0, (void *)asvm, patt, sizeof(patt), global_size*sizeof(cl_uint), 0, NULL, &evs[0]);
test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemFill(queue0, (void *)bsvm, patt, sizeof(patt), global_size*sizeof(cl_uint), 0, NULL, &evs[1]);
test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemFill(queue0, (void *)csvm, patt, sizeof(patt), global_size*sizeof(cl_uint), 0, NULL, &evs[2]);
test_error(error, "clEnqueueSVMMemFill failed");
}
{
// Now migrate fully to device 1 and discard the data
char* ptrs[] = { asvm, bsvm, csvm };
error = clEnqueueSVMMigrateMem(queue1, 3, (const void**)ptrs, NULL, CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED, 1, &evs[2], &evs[3]);
test_error(error, "clEnqueueSVMMigrateMem failed");
}
{
// Test host flag
char *ptrs[] = { asvm+1, bsvm+3, csvm+5 };
const size_t szs[] = { 1, 1, 0 };
error = clEnqueueSVMMigrateMem(queue0, 3, (const void**)ptrs, szs, CL_MIGRATE_MEM_OBJECT_HOST, 1, &evs[3], &evs[4]);
test_error(error, "clEnqueueSVMMigrateMem failed");
}
{
// Next fill with known data
error = clEnqueueSVMMap(queue1, CL_FALSE, CL_MAP_WRITE, (void*)asvm, global_size*sizeof(cl_uint), 1, &evs[4], &evs[5]);
test_error(error, "clEnqueueSVMMap failed");
error = clEnqueueSVMMap(queue1, CL_FALSE, CL_MAP_WRITE, (void*)bsvm, global_size*sizeof(cl_uint), 0, NULL, &evs[6]);
test_error(error, "clEnqueueSVMMap failed");
error = clEnqueueSVMMap(queue1, CL_FALSE, CL_MAP_WRITE, (void*)csvm, global_size*sizeof(cl_uint), 0, NULL, &evs[7]);
test_error(error, "clEnqueueSVMMap failed");
}
error = clFlush(queue0);
test_error(error, "clFlush failed");
error = clFlush(queue1);
test_error(error, "clFlush failed");
error = wait_and_release("first batch", evs, 8);
if (error)
return -1;
memcpy((void *)asvm, (void *)amem, global_size*sizeof(cl_uint));
memcpy((void *)bsvm, (void *)bmem, global_size*sizeof(cl_uint));
memcpy((void *)csvm, (void *)cmem, global_size*sizeof(cl_uint));
{
error = clEnqueueSVMUnmap(queue1, (void *)asvm, 0, NULL, &evs[0]);
test_error(error, "clEnqueueSVMUnmap failed");
error = clEnqueueSVMUnmap(queue1, (void *)bsvm, 0, NULL, &evs[1]);
test_error(error, "clEnqueueSVMUnmap failed");
error = clEnqueueSVMUnmap(queue1, (void *)csvm, 0, NULL, &evs[2]);
test_error(error, "clEnqueueSVMUnmap failed");
}
{
// Now try some overlapping regions, and operate on the result
char *ptrs[] = { asvm+100, bsvm+17, csvm+1000, asvm+101, bsvm+19, csvm+1017 };
const size_t szs[] = { 13, 23, 43, 3, 7, 11 };
error = clEnqueueSVMMigrateMem(queue0, 3, (const void**)ptrs, szs, 0, 1, &evs[2], &evs[3]);
test_error(error, "clEnqueueSVMMigrateMem failed");
error = clEnqueueNDRangeKernel(queue0, kernel, 1, NULL, &global_size, NULL, 0, NULL, &evs[4]);
test_error(error, "clEnqueueNDRangeKernel failed");
}
{
// Now another pair
char *ptrs[] = { asvm+8, bsvm+17, csvm+31, csvm+83 };
const size_t szs[] = { 0, 1, 3, 7 };
error = clEnqueueSVMMigrateMem(queue1, 4, (const void**)ptrs, szs, 0, 1, &evs[4], &evs[5]);
test_error(error, "clEnqueueSVMMigrateMem failed");
error = clEnqueueNDRangeKernel(queue1, kernel, 1, NULL, &global_size, NULL, 0, NULL, &evs[6]);
test_error(error, "clEnqueueNDRangeKernel failed");
}
{
// Another pair
char *ptrs[] = { asvm+64, asvm+128, bsvm+64, bsvm+128, csvm, csvm+64 };
const size_t szs[] = { 64, 64, 64, 64, 64, 64 };
error = clEnqueueSVMMigrateMem(queue0, 6, (const void**)ptrs, szs, 0, 1, &evs[6], &evs[7]);
test_error(error, "clEnqueueSVMMigrateMem failed");
error = clEnqueueNDRangeKernel(queue0, kernel, 1, NULL, &global_size, NULL, 0, NULL, &evs[8]);
test_error(error, "clEnqueueNDRangeKernel failed");
}
{
// Final pair
char *ptrs[] = { asvm, asvm, bsvm, csvm, csvm };
const size_t szs[] = { 0, 1, 0, 1, 0 };
error = clEnqueueSVMMigrateMem(queue1, 5, (const void**)ptrs, szs, 0, 1, &evs[8], &evs[9]);
test_error(error, "clEnqueueSVMMigrateMem failed");
error = clEnqueueNDRangeKernel(queue1, kernel, 1, NULL, &global_size, NULL, 0, NULL, &evs[10]);
test_error(error, "clEnqueueNDRangeKernel failed");
}
{
error = clEnqueueSVMMap(queue1, CL_FALSE, CL_MAP_READ, (void*)asvm, global_size*sizeof(cl_uint), 0, NULL, &evs[11]);
test_error(error, "clEnqueueSVMMap failed");
error = clEnqueueSVMMap(queue1, CL_FALSE, CL_MAP_READ, (void*)bsvm, global_size*sizeof(cl_uint), 0, NULL, &evs[12]);
test_error(error, "clEnqueueSVMMap failed");
error = clEnqueueSVMMap(queue1, CL_FALSE, CL_MAP_READ, (void*)csvm, global_size*sizeof(cl_uint), 0, NULL, &evs[13]);
test_error(error, "clEnqueueSVMMap failed");
}
error = clFlush(queue0);
test_error(error, "clFlush failed");
error = clFlush(queue1);
test_error(error, "clFlush failed");
error = wait_and_release("batch 2", evs, 14);
if (error)
return -1;
// Check kernel results
bool ok = check("memory a", (cl_uint *)asvm, amem, global_size);
ok &= check("memory b", (cl_uint *)bsvm, bmem, global_size);
ok &= check("memory c", (cl_uint *)csvm, cmem, global_size);
{
void *ptrs[] = { asvm, bsvm, csvm };
error = clEnqueueSVMUnmap(queue1, (void *)asvm, 0, NULL, &evs[0]);
test_error(error, "clEnqueueSVMUnmap failed");
error = clEnqueueSVMUnmap(queue1, (void *)bsvm, 0, NULL, &evs[1]);
test_error(error, "clEnqueueSVMUnmap failed");
error = clEnqueueSVMUnmap(queue1, (void *)csvm, 0, NULL, &evs[2]);
test_error(error, "clEnqueueSVMUnmap failed");
error = clEnqueueSVMFree(queue1, 3, ptrs, NULL, NULL, 0, NULL, &evs[3]);
}
error = clFlush(queue1);
test_error(error, "clFlush failed");
error = wait_and_release("batch 3", evs, 4);
if (error)
return -1;
clSVMFree(context, asvm);
clSVMFree(context, bsvm);
clSVMFree(context, csvm);
// The wrappers will clean up the rest
return ok ? 0 : -1;
}

View File

@@ -0,0 +1,115 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
const char *SVMPointerPassing_test_kernel[] = {
"__kernel void verify_char(__global uchar* pChar, volatile __global uint* num_correct, uchar expected)\n"
"{\n"
" if(0 == get_global_id(0))\n"
" {\n"
" *num_correct = 0;\n"
" if(*pChar == expected)\n"
" {\n"
" *num_correct=1;\n"
" }\n"
" }\n"
"}\n"
};
// Test that arbitrarily aligned char pointers into shared buffers can be passed directly to a kernel.
// This iterates through a buffer passing a pointer to each location to the kernel.
// The buffer is initialized to known values at each location.
// The kernel checks that it finds the expected value at each location.
// TODO: possibly make this work across all base types (including typeN?), also check ptr arithmetic ++,--.
int test_svm_pointer_passing(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error) return -1;
clKernelWrapper kernel_verify_char = clCreateKernel(program, "verify_char", &error);
test_error(error,"clCreateKernel failed");
size_t bufSize = 256;
char *pbuf = (char*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_uchar)*bufSize, 0);
cl_int *pNumCorrect = NULL;
pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0);
{
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar)*bufSize, pbuf, &error);
test_error(error, "clCreateBuffer failed.");
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error);
test_error(error, "clCreateBuffer failed.");
error = clSetKernelArg(kernel_verify_char, 1, sizeof(void*), (void *) &num_correct);
test_error(error, "clSetKernelArg failed");
// put values into buf so that we can expect to see these values in the kernel when we pass a pointer to them.
cl_command_queue cmdq = queues[0];
cl_uchar* pBuf = (cl_uchar*) clEnqueueMapBuffer(cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uchar)*bufSize, 0, NULL,NULL, &error);
test_error2(error, pBuf, "clEnqueueMapBuffer failed");
for(int i = 0; i<(int)bufSize; i++)
{
pBuf[i]= (cl_uchar)i;
}
error = clEnqueueUnmapMemObject(cmdq, buf, pBuf, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed.");
for (cl_uint ii = 0; ii<num_devices; ++ii) // iterate over all devices in the platform.
{
cmdq = queues[ii];
for(int i = 0; i<(int)bufSize; i++)
{
cl_uchar* pChar = &pBuf[i];
error = clSetKernelArgSVMPointer(kernel_verify_char, 0, pChar); // pass a pointer to a location within the buffer
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_verify_char, 2, sizeof(cl_uchar), (void *) &i ); // pass the expected value at the above location.
test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_char, 1, NULL, &bufSize, NULL, 0, NULL, NULL);
test_error(error,"clEnqueueNDRangeKernel failed");
pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
cl_int correct_count = *pNumCorrect;
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed.");
if(correct_count != 1)
{
log_error("Passing pointer directly to kernel for byte #%d failed on device %d\n", i, ii);
return -1;
}
}
}
error = clFinish(cmdq);
test_error(error, "clFinish failed");
}
clSVMFree(context, pbuf);
clSVMFree(context, pNumCorrect);
return 0;
}

View File

@@ -0,0 +1,153 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
typedef struct {
cl_int *pA;
cl_int *pB;
cl_int *pC;
} BufPtrs;
const char *set_kernel_exec_info_svm_ptrs_kernel[] = {
"struct BufPtrs;\n"
"\n"
"typedef struct {\n"
" __global int *pA;\n"
" __global int *pB;\n"
" __global int *pC;\n"
"} BufPtrs;\n"
"\n"
"__kernel void set_kernel_exec_info_test(__global BufPtrs* pBufs)\n"
"{\n"
" size_t i;\n"
" i = get_global_id(0);\n"
" pBufs->pA[i]++;\n"
" pBufs->pB[i]++;\n"
" pBufs->pC[i]++;\n"
"}\n"
};
// Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
//
int test_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper c = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper q;
//error = create_cl_objects(deviceID, &set_kernel_exec_info_svm_ptrs_kernel[0], &context, &program, &q, &num_devices, CL_DEVICE_SVM_FINE_GRAIN);
error = create_cl_objects(deviceID, &set_kernel_exec_info_svm_ptrs_kernel[0], &c, &program, &q, &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(error < 0) return -1; // fail test.
clKernelWrapper k = clCreateKernel(program, "set_kernel_exec_info_test", &error);
test_error(error, "clCreateKernel failed");
size_t size = num_elements*sizeof(int);
//int* pA = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
//int* pB = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
//int* pC = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
int* pA = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
int* pB = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
int* pC = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
BufPtrs* pBuf = (BufPtrs*) clSVMAlloc(c, CL_MEM_READ_WRITE, sizeof(BufPtrs), 0);
bool failed = false;
{
clMemWrapper ba,bb,bc,bBuf;
ba = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pA, &error);
test_error(error, "clCreateBuffer failed");
bb = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pB, &error);
test_error(error, "clCreateBuffer failed");
bc = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pC, &error);
test_error(error, "clCreateBuffer failed");
bBuf = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, sizeof(BufPtrs), pBuf, &error);
test_error(error, "clCreateBuffer failed");
clEnqueueMapBuffer(q, ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(q, bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(q, bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(q, bBuf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(BufPtrs), 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
for(int i = 0; i < num_elements; i++) pA[i] = pB[i] = pC[i] = 0;
pBuf->pA = pA;
pBuf->pB = pB;
pBuf->pC = pC;
error = clEnqueueUnmapMemObject(q, ba, pA, 0,NULL,NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(q, bb, pB, 0,NULL,NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(q, bc, pC, 0,NULL,NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(q, bBuf, pBuf, 0,NULL,NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clSetKernelArgSVMPointer(k, 0, pBuf);
test_error(error, "clSetKernelArg failed");
error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, sizeof(BufPtrs), pBuf);
test_error(error, "clSetKernelExecInfo failed");
size_t range = num_elements;
error = clEnqueueNDRangeKernel(q, k, 1, NULL, &range, NULL, 0, NULL, NULL);
test_error(error,"clEnqueueNDRangeKernel failed");
error = clFinish(q);
test_error(error, "clFinish failed.");
clEnqueueMapBuffer(q, ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(q, bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(q, bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
for(int i = 0; i < num_elements; i++)
{
if(pA[i] + pB[i] + pC[i] != 3)
failed = true;
}
error = clEnqueueUnmapMemObject(q, ba, pA, 0,NULL,NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(q, bb, pB, 0,NULL,NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(q, bc, pC, 0,NULL,NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
}
error = clFinish(q);
test_error(error, " clFinish failed.");
clSVMFree(c, pA);
clSVMFree(c, pB);
clSVMFree(c, pC);
clSVMFree(c, pBuf);
if(failed) return -1;
return 0;
}

View File

@@ -0,0 +1,282 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
// Creates linked list using host code
cl_int create_linked_lists_on_host(cl_command_queue cmdq, cl_mem nodes, Node *pNodes2, cl_int ListLength, size_t numLists, cl_bool useNewAPI )
{
cl_int error = CL_SUCCESS;
log_info("SVM: creating linked list on host ");
Node *pNodes;
if (useNewAPI == CL_FALSE)
{
pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqMapBuffer failed");
}
else
{
pNodes = pNodes2;
error = clEnqueueSVMMap(cmdq, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, pNodes2, sizeof(Node)*ListLength*numLists, 0, NULL,NULL);
test_error2(error, pNodes, "clEnqueueSVMMap failed");
}
create_linked_lists(pNodes, numLists, ListLength);
if (useNewAPI == CL_FALSE)
{
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed.");
}
else
{
error = clEnqueueSVMUnmap(cmdq, pNodes2, 0, NULL, NULL);
test_error(error, "clEnqueueSVMUnmap failed.");
}
error = clFinish(cmdq);
test_error(error, "clFinish failed.");
return error;
}
// Purpose: uses host code to verify correctness of the linked list
cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, Node *pNodes2, cl_int ListLength, size_t numLists, cl_bool useNewAPI )
{
cl_int error = CL_SUCCESS;
cl_int correct_count;
Node *pNodes;
if (useNewAPI == CL_FALSE)
{
pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
}
else
{
pNodes = pNodes2;
error = clEnqueueSVMMap(cmdq, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, pNodes2, sizeof(Node)*ListLength * numLists, 0, NULL,NULL);
test_error2(error, pNodes, "clEnqueueSVMMap failed");
}
correct_count = 0;
error = verify_linked_lists(pNodes, numLists, ListLength);
if(error) return -1;
if (useNewAPI == CL_FALSE)
{
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed.");
}
else
{
error = clEnqueueSVMUnmap(cmdq, pNodes2, 0,NULL,NULL);
test_error(error, "clEnqueueSVMUnmap failed.");
}
error = clFinish(cmdq);
test_error(error, "clFinish failed.");
return error;
}
cl_int create_linked_lists_on_device(int ci, cl_command_queue cmdq, cl_mem allocator, cl_kernel kernel_create_lists, size_t numLists )
{
cl_int error = CL_SUCCESS;
log_info("SVM: creating linked list on device: %d ", ci);
size_t *pAllocator = (size_t*) clEnqueueMapBuffer(cmdq, allocator, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
test_error2(error, pAllocator, "clEnqueueMapBuffer failed");
// reset allocator index
*pAllocator = numLists; // the first numLists elements of the nodes array are already allocated (they hold the head of each list).
error = clEnqueueUnmapMemObject(cmdq, allocator, pAllocator, 0,NULL,NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueNDRangeKernel(cmdq, kernel_create_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRange failed.");
error = clFinish(cmdq);
test_error(error, "clFinish failed.");
return error;
}
cl_int verify_linked_lists_on_device(int vi, cl_command_queue cmdq,cl_mem num_correct, cl_kernel kernel_verify_lists, cl_int ListLength, size_t numLists )
{
cl_int error = CL_SUCCESS;
log_info(" and verifying on device: %d ", vi);
cl_int *pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
*pNumCorrect = 0; // reset numCorrect to zero
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed.");
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
test_error(error,"clEnqueueNDRangeKernel failed");
pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
cl_int correct_count = *pNumCorrect;
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
clFinish(cmdq);
test_error(error,"clFinish failed");
if(correct_count != ListLength * (cl_uint)numLists)
{
error = -1;
log_info("Failed\n");
}
else
log_info("Passed\n");
return error;
}
// This tests that all devices and the host share a common address space; using only the coarse-grain features.
// This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
// the platform. The test passes only if every combination passes.
int shared_address_space_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements, cl_bool useNewAPI)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error) return -1;
size_t numLists = num_elements;
cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
// this buffer holds the linked list nodes.
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
{
cl_bool usesSVMpointer = CL_FALSE;
clMemWrapper nodes;
if (useNewAPI == CL_FALSE)
{
nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
test_error(error, "clCreateBuffer failed.");
// verify if buffer uses SVM pointer
size_t paramSize = 0;
error = clGetMemObjectInfo(nodes, CL_MEM_USES_SVM_POINTER, 0, 0, &paramSize);
test_error(error, "clGetMemObjectInfo failed.");
if (paramSize != sizeof(cl_bool))
{
log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned wrong size.");
return -1;
}
error = clGetMemObjectInfo(nodes, CL_MEM_USES_SVM_POINTER, sizeof(cl_bool), &usesSVMpointer, 0);
test_error(error, "clGetMemObjectInfo failed.");
if (usesSVMpointer != CL_TRUE)
{
log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned CL_FALSE for buffer created from SVM pointer.");
return -1;
}
}
// this buffer holds an index into the nodes buffer, it is used for node allocation
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
error = clGetMemObjectInfo(allocator, CL_MEM_USES_SVM_POINTER, sizeof(cl_bool), &usesSVMpointer, 0);
test_error(error, "clGetMemObjectInfo failed.");
if (usesSVMpointer != CL_FALSE)
{
log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned CL_TRUE for non-SVM buffer.");
return -1;
}
// this buffer holds the count of correct nodes, which is computed by the verify kernel.
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
if (useNewAPI == CL_TRUE)
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
else
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &allocator);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int), (void *) &ListLength);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &num_correct);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host).
// Do this for all possible combinations of devices and host within the platform.
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
{
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
{
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
{
error = create_linked_lists_on_host(queues[0], nodes, pNodes, ListLength, numLists, useNewAPI);
if(error) return -1;
}
else
{
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
if(error) return -1;
}
if(vi == num_devices)
{
error = verify_linked_lists_on_host(vi, queues[0], nodes, pNodes, ListLength, numLists, useNewAPI);
if(error) return -1;
}
else
{
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
if(error) return -1;
}
}
}
}
clSVMFree(context, pNodes);
return 0;
}
int test_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_FALSE);
}
int test_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_TRUE);
}

View File

@@ -0,0 +1,101 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
// This tests that all devices and the host share a common address space using fine-grain mode with no buffers.
// This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
// the platform. The test passes only if every combination passes.
int test_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM);
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(error < 0) return -1; // fail test.
size_t numLists = num_elements;
cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
// this allocation holds the linked list nodes.
// FIXME: remove the alignment once prototype can handle it
Node* pNodes = (Node*) align_malloc(numLists*ListLength*sizeof(Node),128);
test_error2(error, pNodes, "malloc failed");
// this allocation holds an index into the nodes buffer, it is used for node allocation
size_t* pAllocator = (size_t*) align_malloc(sizeof(cl_int), 128);
test_error2(error, pAllocator, "malloc failed");
// this allocation holds the count of correct nodes, which is computed by the verify kernel.
cl_int* pNum_correct = (cl_int*) align_malloc(sizeof(cl_int), 128);
test_error2(error, pNum_correct, "malloc failed");
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),(void *) &ListLength);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNum_correct);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host).
// Do this for all possible combinations of devices and host within the platform.
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
{
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
{
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
{
log_info("creating linked list on host ");
create_linked_lists(pNodes, numLists, ListLength);
}
else
{
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
if(error) return -1;
}
if(vi == num_devices)
{
error = verify_linked_lists(pNodes, numLists, ListLength);
if(error) return -1;
}
else
{
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNum_correct, kernel_verify_lists, ListLength, numLists);
if(error) return -1;
}
}
}
align_free(pNodes);
align_free(pAllocator);
align_free(pNum_correct);
return 0;
}

View File

@@ -0,0 +1,138 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
cl_int create_linked_lists_on_device_no_map(int ci, cl_command_queue cmdq, size_t* pAllocator, cl_kernel kernel_create_lists, size_t numLists )
{
cl_int error = CL_SUCCESS;
log_info("SVM: creating linked list on device: %d ", ci);
// reset allocator index
*pAllocator = numLists; // the first numLists elements of the nodes array are already allocated (they hold the head of each list).
error = clEnqueueNDRangeKernel(cmdq, kernel_create_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRange failed.");
error = clFinish(cmdq);
test_error(error, "clFinish failed.");
return error;
}
cl_int verify_linked_lists_on_device_no_map(int vi, cl_command_queue cmdq,cl_int* pNumCorrect, cl_kernel kernel_verify_lists, cl_int ListLength, size_t numLists )
{
cl_int error = CL_SUCCESS;
log_info(" and verifying on device: %d ", vi);
*pNumCorrect = 0; // reset numCorrect to zero
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
test_error(error,"clEnqueueNDRangeKernel failed");
clFinish(cmdq);
test_error(error,"clFinish failed");
cl_int correct_count = *pNumCorrect;
if(correct_count != ListLength * (cl_uint)numLists)
{
error = -1;
log_info("Failed\n");
}
else
log_info("Passed\n");
return error;
}
// This tests that all devices and the host share a common address space; using only the fine-grain with buffers mode.
// This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
// the platform. The test passes only if every combination passes.
int test_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(error < 0) return -1; // fail test.
size_t numLists = num_elements;
cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
// this buffer holds the linked list nodes.
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(Node)*ListLength*numLists, 0);
// this buffer holds an index into the nodes buffer, it is used for node allocation
size_t *pAllocator = (size_t*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(size_t), 0);
// this buffer holds the count of correct nodes, which is computed by the verify kernel.
cl_int *pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_int), 0);
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int), (void *) &ListLength);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNumCorrect);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host).
// Do this for all possible combinations of devices and host within the platform.
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
{
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
{
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
{
log_info("SVM: creating linked list on host ");
create_linked_lists(pNodes, numLists, ListLength);
}
else
{
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
if(error) return -1;
}
if(vi == num_devices)
{
error = verify_linked_lists(pNodes, numLists, ListLength);
if(error) return -1;
}
else
{
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNumCorrect, kernel_verify_lists, ListLength, numLists);
if(error) return -1;
}
}
}
clSVMFree(context, pNodes);
clSVMFree(context, pAllocator);
clSVMFree(context, pNumCorrect);
return 0;
}

View File

@@ -0,0 +1,241 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
const char *shared_sub_buffers_test_kernel[] = {
"typedef struct Node {\n"
" int global_id;\n"
" int position_in_list;\n"
" __global struct Node* pNext;\n"
"} Node;\n"
// create linked lists that use nodes from 2 different buffers
"__global Node* allocate_node(__global Node* pNodes1, __global Node* pNodes2, volatile __global int* allocation_index, size_t i)\n"
"{\n"
// mix things up, adjacent work items will allocate from different buffers
" if(i & 0x1)\n"
" return &pNodes1[atomic_inc(allocation_index)];\n"
" else\n"
" return &pNodes2[atomic_inc(allocation_index)];\n"
"}\n"
// The allocation_index parameter must be initialized on the host to N work-items
// The first N nodes in pNodes will be the heads of the lists.
// This tests passing 4 different sub-buffers that come from two parent buffers.
// Note that we have arguments that appear to be unused, but they are required so that system knows to get all the sub-buffers on to the device
"__kernel void create_linked_lists(__global Node* pNodes_sub1, __global Node* pNodes2_sub1, __global Node* pNodes_sub2, __global Node* pNodes2_sub2, volatile __global int* allocation_index, int list_length) \n"
"{\n"
" size_t i = get_global_id(0);\n"
" __global Node *pNode = &pNodes_sub1[i];\n"
" pNode->global_id = i;\n"
" pNode->position_in_list = 0;\n"
" __global Node *pNew;\n"
" for(int j=1; j < list_length; j++) {\n"
" pNew = allocate_node(pNodes_sub1, pNodes2_sub1, allocation_index, i);\n"
" pNew->global_id = i;\n"
" pNew->position_in_list = j;\n"
" pNode->pNext = pNew; // link new node onto end of list\n"
" pNode = pNew; // move to end of list\n"
" }\n"
"}\n"
// Note that we have arguments that appear to be unused, but they are required so that system knows to get all the sub-buffers on to the device
"__kernel void verify_linked_lists(__global Node* pNodes_sub1, __global Node* pNodes2_sub1, __global Node* pNodes_sub2, __global Node* pNodes2_sub2, volatile __global uint* num_correct, int list_length)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" __global Node *pNode = &pNodes_sub1[i];\n"
" for(int j=0; j < list_length; j++) {\n"
" if( pNode->global_id == i && pNode->position_in_list == j)\n"
" atomic_inc(num_correct);\n"
" else \n"
" break;\n"
" pNode = pNode->pNext;\n"
" }\n"
"}\n"
};
// Creates linked list using host code.
cl_int create_linked_lists_on_host_sb(cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
{
cl_int error = CL_SUCCESS;
log_info("SVM: creating linked list on host ");
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes2, "clEnqueueMapBuffer failed");
create_linked_lists(pNodes, numLists, ListLength);
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clFinish(cmdq);
test_error(error, "clFinish failed");
return error;
}
// Verify correctness of the linked list using host code.
cl_int verify_linked_lists_on_host_sb(int ci, cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
{
cl_int error = CL_SUCCESS;
//log_info(" and verifying on host ");
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
error = verify_linked_lists(pNodes, numLists, ListLength);
if(error) return -1;
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clFinish(cmdq);
test_error(error, "clFinish failed");
return error;
}
// This tests that shared sub-buffers can be created and that they inherit the flags from the parent buffer when no flags are specified.
// This tests that passing only the sub-buffers to a kernel works.
// The test is derived from the cross-buffer pointers test which
// tests that shared buffers are able to contain pointers that point to other shared buffers.
// This tests that all devices and the host share a common address space; using only the coarse-grain features.
// This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host.
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
// This basic test is performed for all combinations of devices and the host.
int test_shared_sub_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error) return -1;
size_t numLists = num_elements;
if(numLists & 0x1) numLists++; // force even size, so we can easily create two sub-buffers of same size.
cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
size_t nodes_bufsize = sizeof(Node)*ListLength*numLists;
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
{
// this buffer holds some of the linked list nodes.
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, nodes_bufsize, pNodes, &error);
test_error(error, "clCreateBuffer failed.");
cl_buffer_region r;
r.origin = 0;
r.size = nodes_bufsize / 2;
// this should inherit the flag settings from nodes buffer
clMemWrapper nodes_sb1 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
test_error(error, "clCreateSubBuffer");
r.origin = nodes_bufsize / 2;
clMemWrapper nodes_sb2 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
test_error(error, "clCreateSubBuffer");
// this buffer holds some of the linked list nodes.
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
test_error(error, "clCreateBuffer failed.");
r.origin = 0;
r.size = nodes_bufsize / 2;
// this should inherit the flag settings from nodes buffer
clMemWrapper nodes2_sb1 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
test_error(error, "clCreateSubBuffer");
r.origin = nodes_bufsize / 2;
clMemWrapper nodes2_sb2 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION,(void*)&r, &error);
test_error(error, "clCreateSubBuffer");
// this buffer holds the index into the nodes buffer that is used for node allocation
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the count of correct nodes which is computed by the verify kernel.
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes_sb1);
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &nodes_sb2);
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void*), (void *) &allocator);
error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),(void *) &ListLength);
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes_sb1);
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &nodes_sb2);
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void*), (void *) &num_correct);
error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),(void *) &ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host).
// Do this for all possible combinations of devices and host within the platform.
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
{
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
{
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
{
error = create_linked_lists_on_host_sb(queues[0], nodes, nodes2, ListLength, numLists);
if(error) return -1;
}
else
{
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
if(error) return -1;
}
if(vi == num_devices)
{
error = verify_linked_lists_on_host_sb(vi, queues[0], nodes, nodes2, ListLength, numLists);
if(error) return -1;
}
else
{
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
if(error) return -1;
}
} // inner loop, vi
} // outer loop, ci
}
clSVMFree(context, pNodes2);
clSVMFree(context, pNodes);
return 0;
}

View File

@@ -0,0 +1,19 @@
set(MODULE_NAME ALLOCATIONS)
set(${MODULE_NAME}_SOURCES
main.cpp
allocation_execute.cpp
allocation_fill.cpp
allocation_functions.cpp
allocation_utils.cpp
../../test_common/harness/errorHelpers.c
../../test_common/harness/threadTesting.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/testHarness.c
../../test_common/harness/typeWrappers.cpp
../../test_common/harness/mt19937.c
../../test_common/harness/msvc9.c
../../test_common/harness/parseParameters.cpp
)
include(../CMakeCommon.txt)

View File

@@ -0,0 +1,19 @@
project
: requirements
# <toolset>gcc:<cflags>-xc++
# <toolset>msvc:<cflags>"/TP"
;
exe test_allocations
: allocation_execute.cpp
allocation_fill.cpp
allocation_functions.cpp
allocation_utils.cpp
main.cpp
;
install dist
: test_allocations
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/allocations
<variant>release:<location>$(DIST)/release/tests/test_conformance/allocations
;

View File

@@ -0,0 +1,46 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.cpp \
allocation_functions.cpp \
allocation_fill.cpp \
allocation_utils.cpp \
allocation_execute.cpp \
../../test_common/harness/errorHelpers.c \
../../test_common/harness/threadTesting.c \
../../test_common/harness/kernelHelpers.c \
../../test_common/harness/testHarness.c \
../../test_common/harness/mt19937.c \
../../test_common/harness/typeWrappers.cpp
DEFINES = DONT_TEST_GARBAGE_POINTERS
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
FRAMEWORK = $(SOURCES)
HEADERS =
TARGET = test_allocations
INCLUDE =
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,333 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "allocation_execute.h"
#include "allocation_functions.h"
const char *buffer_kernel_pattern = {
"__kernel void sample_test(%s __global uint *result, __global uint *array_sizes, uint per_item)\n"
"{\n"
"\tint tid = get_global_id(0);\n"
"\tuint r = 0;\n"
"\tuint i;\n"
"\tfor(i=tid*per_item; i<(1+tid)*per_item; i++) {\n"
"%s"
"\t}\n"
"\tresult[tid] = r;\n"
"}\n" };
const char *image_kernel_pattern = {
"__kernel void sample_test(%s __global uint *result)\n"
"{\n"
"\tuint4 color;\n"
"\tcolor = (uint4)(0);\n"
"%s"
"\tint x, y;\n"
"%s"
"\tresult[get_global_id(0)] += color.x + color.y + color.z + color.w;\n"
"}\n" };
const char *read_pattern = {
"\tfor(y=0; y<get_image_height(image%d); y++)\n"
"\t\tif (y %s get_global_size(0) == get_global_id(0))\n"
"\t\t\tfor (x=0; x<get_image_width(image%d); x++) {\n"
"\t\t\t\tcolor += read_imageui(image%d, sampler, (int2)(x,y));\n"
"\t\t\t}\n"
};
const char *offset_pattern =
"\tconst uint4 offset = (uint4)(0,1,2,3);\n";
const char *sampler_pattern =
"\tconst sampler_t sampler = CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n";
const char *write_pattern = {
"\tfor(y=0; y<get_image_height(image%d); y++)\n"
"\t\tif (y %s get_global_size(0) == get_global_id(0))\n"
"\t\t\tfor (x=0; x<get_image_width(image%d); x++) {\n"
"\t\t\t\tcolor = (uint4)x*(uint4)y+offset;\n"
"\t\t\t\twrite_imageui(image%d, (int2)(x,y), color);\n"
"\t\t\t}\n"
"\tbarrier(CLK_LOCAL_MEM_FENCE);\n"
};
int check_image(cl_command_queue queue, cl_mem mem) {
int error;
cl_mem_object_type type;
size_t width, height;
size_t origin[3], region[3], x, j;
cl_uint *data;
error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
if (error) {
print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
return -1;
}
if (type == CL_MEM_OBJECT_BUFFER) {
log_error("Expected image object, not buffer.\n");
return -1;
} else if (type == CL_MEM_OBJECT_IMAGE2D) {
error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
if (error) {
print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
return -1;
}
error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
if (error) {
print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
return -1;
}
}
data = (cl_uint*)malloc(width*4*sizeof(cl_uint));
if (data == NULL) {
log_error("Failed to malloc host buffer for writing into image.\n");
return FAILED_ABORT;
}
origin[0] = 0;
origin[1] = 0;
origin[2] = 0;
region[0] = width;
region[1] = 1;
region[2] = 1;
for (origin[1] = 0; origin[1] < height; origin[1]++) {
error = clEnqueueReadImage(queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
if (error) {
print_error(error, "clEnqueueReadImage failed");
free(data);
return error;
}
for (x=0; x<width; x++) {
for (j=0; j<4; j++) {
if (data[x*4+j] != (cl_uint)(x*origin[1]+j)) {
log_error("Pixel %d, %d, component %d, expected %u, got %u.\n",
(int)x, (int)origin[1], (int)j, (cl_uint)(x*origin[1]+j), data[x*4+j]);
return -1;
}
}
}
}
free(data);
return 0;
}
#define NUM_OF_WORK_ITEMS 8192*2
int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum) {
char *argument_string;
char *access_string;
char *kernel_string;
int i, error, result;
clKernelWrapper kernel;
clProgramWrapper program;
clMemWrapper result_mem;
char *ptr;
size_t global_dims[3];
cl_uint per_item;
cl_uint per_item_uint;
cl_uint returned_results[NUM_OF_WORK_ITEMS], final_result;
clEventWrapper event;
cl_int event_status;
// Allocate memory for the kernel source
argument_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*64);
access_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10));
kernel_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10+64)+1024);
argument_string[0] = '\0';
access_string[0] = '\0';
kernel_string[0] = '\0';
// Zero the results.
for (i=0; i<NUM_OF_WORK_ITEMS; i++)
returned_results[i] = 0;
// Build the kernel source
if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
for(i=0; i<number_of_mems_used; i++) {
sprintf(argument_string + strlen(argument_string), " __global uint *buffer%d, ", i);
sprintf(access_string + strlen( access_string), "\t\tif (i<array_sizes[%d]) r += buffer%d[i];\n", i, i);
}
sprintf(kernel_string, buffer_kernel_pattern, argument_string, access_string);
}
else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING) {
for(i=0; i<number_of_mems_used; i++) {
sprintf(argument_string + strlen(argument_string), " read_only image2d_t image%d, ", i);
sprintf(access_string + strlen(access_string), read_pattern, i, "%", i, i);
}
sprintf(kernel_string, image_kernel_pattern, argument_string, sampler_pattern, access_string);
}
else if (test == IMAGE_WRITE || test == IMAGE_WRITE_NON_BLOCKING) {
for(i=0; i<number_of_mems_used; i++) {
sprintf(argument_string + strlen(argument_string), " write_only image2d_t image%d, ", i);
sprintf(access_string + strlen( access_string), write_pattern, i, "%", i, i);
}
sprintf(kernel_string, image_kernel_pattern, argument_string, offset_pattern, access_string);
}
ptr = kernel_string;
// Create the kernel
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" );
free(argument_string);
free(access_string);
free(kernel_string);
result = check_allocation_error(context, device_id, error, queue);
if (result != SUCCEEDED) {
if (result == FAILED_TOO_BIG)
log_info("\t\tCreate kernel failed: %s.\n", IGetErrorString(error));
else
print_error(error, "Create kernel and program failed");
return result;
}
// Set the arguments
for (i=0; i<number_of_mems_used; i++) {
error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mems[i]);
test_error(error, "clSetKernelArg failed");
}
// Set the result
result_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*NUM_OF_WORK_ITEMS, &returned_results, &error);
test_error(error, "clCreateBuffer failed");
error = clSetKernelArg(kernel, i, sizeof(result_mem), &result_mem);
test_error(error, "clSetKernelArg failed");
// Thread dimensions for execution
global_dims[0] = NUM_OF_WORK_ITEMS; global_dims[1] = 1; global_dims[2] = 1;
// We have extra arguments for the buffer kernel because we need to pass in the buffer sizes
cl_uint *sizes = (cl_uint*)malloc(sizeof(cl_uint)*number_of_mems_used);
cl_uint max_size = 0;
clMemWrapper buffer_sizes;
if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
for (i=0; i<number_of_mems_used; i++) {
size_t size;
error = clGetMemObjectInfo(mems[i], CL_MEM_SIZE, sizeof(size), &size, NULL);
test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
sizes[i] = (cl_uint)(size/sizeof(cl_uint));
if (size/sizeof(cl_uint) > max_size)
max_size = (cl_uint)(size/sizeof(cl_uint));
}
buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*number_of_mems_used, sizes, &error);
test_error_abort(error, "clCreateBuffer failed");
error = clSetKernelArg(kernel, number_of_mems_used+1, sizeof(cl_mem), &buffer_sizes);
test_error(error, "clSetKernelArg failed");
per_item = (cl_uint)ceil((double)max_size/global_dims[0]);
if (per_item > CL_UINT_MAX)
log_error("Size is too large for a uint parameter to the kernel. Expect invalid results.\n");
per_item_uint = (cl_uint)per_item;
error = clSetKernelArg(kernel, number_of_mems_used+2, sizeof(per_item_uint), &per_item_uint);
test_error(error, "clSetKernelArg failed");
free(sizes);
}
size_t local_dims[3] = {1,1,1};
error = get_max_common_work_group_size(context, kernel, global_dims[0], &local_dims[0]);
test_error(error, "get_max_common_work_group_size failed");
// Execute the kernel
error = clEnqueueNDRangeKernel(*queue, kernel, 1, NULL, global_dims, local_dims, 0, NULL, &event);
result = check_allocation_error(context, device_id, error, queue);
if (result != SUCCEEDED) {
if (result == FAILED_TOO_BIG)
log_info("\t\tExecute kernel failed: %s (global dim: %ld, local dim: %ld)\n", IGetErrorString(error), global_dims[0], local_dims[0]);
else
print_error(error, "clEnqueueNDRangeKernel failed");
return result;
}
// Finish the test
error = clFinish(*queue);
result = check_allocation_error(context, device_id, error, queue);
if (result != SUCCEEDED) {
if (result == FAILED_TOO_BIG)
log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
else
print_error(error, "clFinish failed");
return result;
}
// Verify that the event from the execution did not have an error
error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
test_error_abort(error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
if (event_status < 0) {
result = check_allocation_error(context, device_id, event_status, queue);
if (result != SUCCEEDED) {
if (result == FAILED_TOO_BIG)
log_info("\t\tEvent returned from kernel execution indicates failure: %s.\n", IGetErrorString(event_status));
else
print_error(event_status, "clEnqueueNDRangeKernel failed");
return result;
}
}
// If we are not verifying the checksum return here
if (!verify_checksum) {
log_info("Note: Allocations were not initialized so kernel execution can not verify correct results.\n");
return SUCCEEDED;
}
// Verify the checksum.
// Read back the result
error = clEnqueueReadBuffer(*queue, result_mem, CL_TRUE, 0, sizeof(cl_uint)*NUM_OF_WORK_ITEMS, &returned_results, 0, NULL, NULL);
test_error_abort(error, "clEnqueueReadBuffer failed");
final_result = 0;
if (test == BUFFER || test == IMAGE_READ || test == BUFFER_NON_BLOCKING || test == IMAGE_READ_NON_BLOCKING) {
// For buffers or read images we are just looking at the sum of what each thread summed up
for (i=0; i<NUM_OF_WORK_ITEMS; i++) {
final_result += returned_results[i];
}
if (final_result != checksum) {
log_error("\t\tChecksum failed to verify. Expected %u got %u.\n", checksum, final_result);
return FAILED_ABORT;
}
log_info("\t\tChecksum verified (%u == %u).\n", checksum, final_result);
} else {
// For write images we need to verify the values
for (i=0; i<number_of_mems_used; i++) {
if (check_image(*queue, mems[i])) {
log_error("\t\tImage contents failed to verify for image %d.\n", (int)i);
return FAILED_ABORT;
}
}
log_info("\t\tImage contents verified.\n");
}
// Finish the test
error = clFinish(*queue);
result = check_allocation_error(context, device_id, error, queue);
if (result != SUCCEEDED) {
if (result == FAILED_TOO_BIG)
log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
else
print_error(error, "clFinish failed");
return result;
}
return SUCCEEDED;
}

View File

@@ -0,0 +1,22 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "allocation_utils.h"
int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum);

View File

@@ -0,0 +1,338 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "allocation_fill.h"
#define BUFFER_CHUNK_SIZE 8*1024*1024
#define IMAGE_LINES 8
#include "../../test_common/harness/compat.h"
int fill_buffer_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, size_t size, MTdata d, cl_bool blocking_write) {
size_t i, j;
cl_uint *data;
int error, result;
cl_uint checksum_delta = 0;
cl_event event;
size_t size_to_use = BUFFER_CHUNK_SIZE;
if (size_to_use > size)
size_to_use = size;
data = (cl_uint*)malloc(size_to_use);
if (data == NULL) {
log_error("Failed to malloc host buffer for writing into buffer.\n");
return FAILED_ABORT;
}
for (i=0; i<size-size_to_use; i+=size_to_use) {
// Put values in the data, and keep a checksum as we go along.
for (j=0; j<size_to_use/sizeof(cl_uint); j++) {
data[j] = genrand_int32(d);
checksum_delta += data[j];
}
if (blocking_write) {
error = clEnqueueWriteBuffer(*queue, mem, CL_TRUE, i, size_to_use, data, 0, NULL, NULL);
result = check_allocation_error(context, device_id, error, queue);
if (result == FAILED_ABORT) {
print_error(error, "clEnqueueWriteBuffer failed.");
}
if (result != SUCCEEDED) {
clFinish(*queue);
free(data);
clReleaseMemObject(mem);
return result;
}
} else {
error = clEnqueueWriteBuffer(*queue, mem, CL_FALSE, i, size_to_use, data, 0, NULL, &event);
result = check_allocation_error(context, device_id, error, queue);
if (result == FAILED_ABORT) {
print_error(error, "clEnqueueWriteBuffer failed.");
}
if (result != SUCCEEDED) {
clFinish(*queue);
free(data);
clReleaseMemObject(mem);
return result;
}
error = clWaitForEvents(1, &event);
result = check_allocation_error(context, device_id, error, queue);
if (result == FAILED_ABORT) {
print_error(error, "clWaitForEvents failed.");
}
if (result != SUCCEEDED) {
clFinish(*queue);
clReleaseEvent(event);
free(data);
clReleaseMemObject(mem);
return result;
}
clReleaseEvent(event);
}
}
// Deal with any leftover bits
if (i < size) {
// Put values in the data, and keep a checksum as we go along.
for (j=0; j<(size-i)/sizeof(cl_uint); j++) {
data[j] = (cl_uint)genrand_int32(d);
checksum_delta += data[j];
}
if (blocking_write) {
error = clEnqueueWriteBuffer(*queue, mem, CL_TRUE, i, size-i, data, 0, NULL, NULL);
result = check_allocation_error(context, device_id, error, queue);
if (result == FAILED_ABORT) {
print_error(error, "clEnqueueWriteBuffer failed.");
}
if (result != SUCCEEDED) {
clFinish(*queue);
clReleaseMemObject(mem);
free(data);
return result;
}
} else {
error = clEnqueueWriteBuffer(*queue, mem, CL_FALSE, i, size-i, data, 0, NULL, &event);
result = check_allocation_error(context, device_id, error, queue);
if (result == FAILED_ABORT) {
print_error(error, "clEnqueueWriteBuffer failed.");
}
if (result != SUCCEEDED) {
clFinish(*queue);
clReleaseMemObject(mem);
free(data);
return result;
}
error = clWaitForEvents(1, &event);
result = check_allocation_error(context, device_id, error, queue);
if (result == FAILED_ABORT) {
print_error(error, "clWaitForEvents failed.");
}
if (result != SUCCEEDED) {
clFinish(*queue);
clReleaseEvent(event);
free(data);
clReleaseMemObject(mem);
return result;
}
clReleaseEvent(event);
}
}
free(data);
// Only update the checksum if this succeeded.
checksum += checksum_delta;
return SUCCEEDED;
}
int fill_image_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, size_t width, size_t height, MTdata d, cl_bool blocking_write) {
size_t origin[3], region[3], j;
int error, result;
cl_uint *data;
cl_uint checksum_delta = 0;
cl_event event;
size_t image_lines_to_use;
image_lines_to_use = IMAGE_LINES;
if (image_lines_to_use > height)
image_lines_to_use = height;
data = (cl_uint*)malloc(width*4*sizeof(cl_uint)*image_lines_to_use);
if (data == NULL) {
log_error("Failed to malloc host buffer for writing into image.\n");
return FAILED_ABORT;
}
origin[0] = 0;
origin[1] = 0;
origin[2] = 0;
region[0] = width;
region[1] = image_lines_to_use;
region[2] = 1;
for (origin[1] = 0; origin[1] < height - image_lines_to_use; origin[1] += image_lines_to_use) {
// Put values in the data, and keep a checksum as we go along.
for (j=0; j<width*4*image_lines_to_use; j++) {
data[j] = (cl_uint)genrand_int32(d);
checksum_delta += data[j];
}
if (blocking_write) {
error = clEnqueueWriteImage(*queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
result = check_allocation_error(context, device_id, error, queue);
if (result == FAILED_ABORT) {
print_error(error, "clEnqueueWriteImage failed.");
}
if (result != SUCCEEDED) {
clFinish(*queue);
clReleaseMemObject(mem);
free(data);
return result;
}
result = clFinish(*queue);
if (result != SUCCEEDED)
{
print_error(error, "clFinish failed after successful enquing filling buffer with data.");
return result;
}
} else {
error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event);
result = check_allocation_error(context, device_id, error, queue);
if (result == FAILED_ABORT) {
print_error(error, "clEnqueueWriteImage failed.");
}
if (result != SUCCEEDED) {
clFinish(*queue);
clReleaseMemObject(mem);
free(data);
return result;
}
error = clWaitForEvents(1, &event);
// Dig out execution error if that is the problem
if (error == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) {
cl_int err, exec_status;
err = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(exec_status), &exec_status, NULL);
test_error(err, "clGetEventInfo failed getting CL_EVENT_COMMAND_EXECUTION_STATUS from failed event");
error = exec_status;
}
result = check_allocation_error(context, device_id, error, queue);
if (result == FAILED_ABORT) {
print_error(error, "clWaitForEvents failed.");
}
if (result != SUCCEEDED) {
clReleaseEvent(event);
free(data);
clReleaseMemObject(mem);
return result;
}
clReleaseEvent(event);
}
}
// Deal with any leftover bits
if (origin[1] < height) {
// Put values in the data, and keep a checksum as we go along.
for (j=0; j<width*4*(height-origin[1]); j++) {
data[j] = (cl_uint)genrand_int32(d);
checksum_delta += data[j];
}
region[1] = height-origin[1];
if(blocking_write) {
error = clEnqueueWriteImage(*queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
result = check_allocation_error(context, device_id, error, queue);
if (result == FAILED_ABORT) {
print_error(error, "clEnqueueWriteImage failed.");
}
if (result != SUCCEEDED) {
clFinish(*queue);
clReleaseMemObject(mem);
free(data);
return result;
}
} else {
error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event);
result = check_allocation_error(context, device_id, error, queue);
if (result == FAILED_ABORT) {
print_error(error, "clEnqueueWriteImage failed.");
}
if (result != SUCCEEDED) {
clFinish(*queue);
clReleaseMemObject(mem);
free(data);
return result;
}
error = clWaitForEvents(1, &event);
result = check_allocation_error(context, device_id, error, queue);
if (result == FAILED_ABORT) {
print_error(error, "clWaitForEvents failed.");
}
if (result != SUCCEEDED) {
clFinish(*queue);
clReleaseEvent(event);
free(data);
clReleaseMemObject(mem);
return result;
}
clReleaseEvent(event);
}
}
free(data);
// Only update the checksum if this succeeded.
checksum += checksum_delta;
return SUCCEEDED;
}
int fill_mem_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, MTdata d, cl_bool blocking_write) {
int error;
cl_mem_object_type type;
size_t size, width, height;
error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
if (type == CL_MEM_OBJECT_BUFFER) {
error = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size), &size, NULL);
test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
return fill_buffer_with_data(context, device_id, queue, mem, size, d, blocking_write);
} else if (type == CL_MEM_OBJECT_IMAGE2D) {
error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
test_error_abort(error, "clGetImageInfo failed for CL_IMAGE_WIDTH.");
error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
test_error_abort(error, "clGetImageInfo failed for CL_IMAGE_HEIGHT.");
return fill_image_with_data(context, device_id, queue, mem, width, height, d, blocking_write);
}
log_error("Invalid CL_MEM_TYPE: %d\n", type);
return FAILED_ABORT;
}

View File

@@ -0,0 +1,19 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "allocation_utils.h"
int fill_mem_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, MTdata d, cl_bool blocking_write);

View File

@@ -0,0 +1,287 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "allocation_functions.h"
#include "allocation_fill.h"
static cl_image_format image_format = { CL_RGBA, CL_UNSIGNED_INT32 };
int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
int error;
// log_info("\t\tAttempting to allocate a %gMB array and fill with %s writes.\n", (size_to_allocate/(1024.0*1024.0)), (blocking_write ? "blocking" : "non-blocking"));
*mem = clCreateBuffer(context, CL_MEM_READ_WRITE, size_to_allocate, NULL, &error);
return check_allocation_error(context, device_id, error, queue);
}
int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t *width, size_t *height, size_t* max_size) {
size_t max_width, max_height, num_pixels, found_width, found_height;
int error;
if (checkForImageSupport(device_id)) {
log_info("Can not allocate an image on this device because it does not support images.");
return FAILED_ABORT;
}
if (size_to_allocate == 0) {
log_error("Trying to allcoate a zero sized image.\n");
return FAILED_ABORT;
}
error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL );
test_error_abort(error, "clGetDeviceInfo failed.");
error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( max_height ), &max_height, NULL );
test_error_abort(error, "clGetDeviceInfo failed.");
num_pixels = size_to_allocate / (sizeof(cl_uint)*4);
if (num_pixels > (max_width*max_height)) {
if(NULL != max_size) {
*max_size = max_width * max_height * sizeof(cl_uint) * 4;
}
return FAILED_TOO_BIG;
}
// We want a close-to-square aspect ratio.
// Note that this implicitly assumes that max width >= max height
found_width = (int)sqrt( (double) num_pixels );
if( found_width > max_width ) {
found_width = max_width;
}
if (found_width == 0)
found_width = 1;
found_height = (size_t)num_pixels/found_width;
if (found_height > max_height) {
found_height = max_height;
}
if (found_height == 0)
found_height = 1;
*width = found_width;
*height = found_height;
if(NULL != max_size) {
*max_size = found_width * found_height * sizeof(cl_uint) * 4;
}
return SUCCEEDED;
}
int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
size_t width, height;
int error;
error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
if (error != SUCCEEDED)
return error;
log_info("\t\tAttempting to allocate a %gMB read-only image (%d x %d) and fill with %s writes.\n",
(size_to_allocate/(1024.0*1024.0)), (int)width, (int)height, (blocking_write ? "blocking" : "non-blocking"));
*mem = create_image_2d(context, CL_MEM_READ_ONLY, &image_format, width, height, 0, NULL, &error);
return check_allocation_error(context, device_id, error, queue);
}
int allocate_image2d_write(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
size_t width, height;
int error;
error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
if (error != SUCCEEDED)
return error;
//log_info("\t\tAttempting to allocate a %gMB write-only image (%d x %d) and fill with %s writes.\n",
//(size_to_allocate/(1024.0*1024.0)), (int)width, (int)height, (blocking_write ? "blocking" : "non-blocking"));
*mem = create_image_2d(context, CL_MEM_WRITE_ONLY, &image_format, width, height, 0, NULL, &error);
return check_allocation_error(context, device_id, error, queue);
}
int do_allocation(cl_context context, cl_command_queue *queue, cl_device_id device_id, size_t size_to_allocate, int type, cl_mem *mem) {
if (type == BUFFER) return allocate_buffer(context, queue, device_id, mem, size_to_allocate, true);
if (type == IMAGE_READ) return allocate_image2d_read(context, queue, device_id, mem, size_to_allocate, true);
if (type == IMAGE_WRITE) return allocate_image2d_write(context, queue, device_id, mem, size_to_allocate, true);
if (type == BUFFER_NON_BLOCKING) return allocate_buffer(context, queue, device_id, mem, size_to_allocate, false);
if (type == IMAGE_READ_NON_BLOCKING) return allocate_image2d_read(context, queue, device_id, mem, size_to_allocate, false);
if (type == IMAGE_WRITE_NON_BLOCKING) return allocate_image2d_write(context, queue, device_id, mem, size_to_allocate, false);
log_error("Invalid allocation type: %d\n", type);
return FAILED_ABORT;
}
int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id device_id, int multiple_allocations, size_t size_to_allocate,
int type, cl_mem mems[], int *number_of_mems, size_t *final_size, int force_fill, MTdata d) {
cl_ulong max_individual_allocation_size, global_mem_size;
int error, result;
size_t amount_allocated;
size_t reduction_amount;
int current_allocation;
size_t allocation_this_time, actual_allocation;
// Set the number of mems used to 0 so if we fail to create even a single one we don't end up returning a garbage value
*number_of_mems = 0;
error = clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_individual_allocation_size), &max_individual_allocation_size, NULL);
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
error = clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, NULL);
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
if (global_mem_size > (cl_ulong)SIZE_MAX) {
global_mem_size = (cl_ulong)SIZE_MAX;
}
// log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
// max_individual_allocation_size, toMB(max_individual_allocation_size),
// global_mem_size, toMB(global_mem_size));
if (size_to_allocate > global_mem_size) {
log_error("Can not allocate more than the global memory size.\n");
return FAILED_ABORT;
}
amount_allocated = 0;
current_allocation = 0;
// If allocating for images, reduce the maximum allocation size to the maximum image size.
// If we don't do this, then the value of CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4 can be higher
// than the maximum image size on systems with 16GB or RAM or more. In this case, we
// succeed in allocating an image but its size is less than CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4
// (min_allocation_allowed) and thus we fail the allocation below.
if(type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING || type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) {
size_t width;
size_t height;
size_t max_size;
error = find_good_image_size(device_id, size_to_allocate, &width, &height, &max_size);
if (!(error == SUCCEEDED || error == FAILED_TOO_BIG))
return error;
if(max_size < max_individual_allocation_size)
max_individual_allocation_size = max_size;
}
reduction_amount = (size_t)max_individual_allocation_size/16;
if (type == BUFFER || type == BUFFER_NON_BLOCKING) log_info("\tAttempting to allocate a buffer of size %gMB.\n", toMB(size_to_allocate));
else if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) log_info("\tAttempting to allocate a read-only image of size %gMB.\n", toMB(size_to_allocate));
else if (type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) log_info("\tAttempting to allocate a write-only image of size %gMB.\n", toMB(size_to_allocate));
// log_info("\t\t(Reduction size is %gMB per iteration, minimum allowable individual allocation size is %gMB.)\n",
// toMB(reduction_amount), toMB(min_allocation_allowed));
// if (force_fill && type != IMAGE_WRITE && type != IMAGE_WRITE_NON_BLOCKING) log_info("\t\t(Allocations will be filled with random data for checksum calculation.)\n");
// If we are only doing a single allocation, only allow 1
int max_to_allocate = multiple_allocations ? MAX_NUMBER_TO_ALLOCATE : 1;
// Make sure that the maximum number of images allocated is constrained by the
// maximum that may be passed to a kernel
if (type != BUFFER && type != BUFFER_NON_BLOCKING) {
cl_device_info param_name = (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) ?
CL_DEVICE_MAX_READ_IMAGE_ARGS : CL_DEVICE_MAX_WRITE_IMAGE_ARGS;
cl_uint max_image_args;
error = clGetDeviceInfo(device_id, param_name, sizeof(max_image_args), &max_image_args, NULL);
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX IMAGE_ARGS");
if ((int)max_image_args < max_to_allocate) {
log_info("\t\tMaximum number of images per kernel limited to %d\n",(int)max_image_args);
max_to_allocate = max_image_args;
}
}
// Try to allocate the requested amount.
while (amount_allocated != size_to_allocate && current_allocation < max_to_allocate) {
// Determine how much more is needed
allocation_this_time = size_to_allocate - amount_allocated;
// Bound by the individual allocation size
if (allocation_this_time > max_individual_allocation_size)
allocation_this_time = (size_t)max_individual_allocation_size;
// Allocate the largest object possible
result = FAILED_TOO_BIG;
//log_info("\t\tTrying sub-allocation %d at size %gMB.\n", current_allocation, toMB(allocation_this_time));
while (result == FAILED_TOO_BIG && allocation_this_time != 0) {
// Create the object
result = do_allocation(context, queue, device_id, allocation_this_time, type, &mems[current_allocation]);
if (result == SUCCEEDED) {
// Allocation succeeded, another memory object was added to the array
*number_of_mems = (current_allocation+1);
// Verify the size is correct to within 1MB.
actual_allocation = get_actual_allocation_size(mems[current_allocation]);
if (fabs((double)allocation_this_time - (double)actual_allocation) > 1024.0*1024.0) {
log_error("Allocation not of expected size. Expected %gMB, got %gMB.\n", toMB(allocation_this_time), toMB( actual_allocation));
return FAILED_ABORT;
}
// If we are filling the allocation for verification do so
if (force_fill) {
//log_info("\t\t\tWriting random values to object and calculating checksum.\n");
cl_bool blocking_write = true;
if (type == BUFFER_NON_BLOCKING || type == IMAGE_READ_NON_BLOCKING || type == IMAGE_WRITE_NON_BLOCKING) {
blocking_write = false;
}
result = fill_mem_with_data(context, device_id, queue, mems[current_allocation], d, blocking_write);
}
}
// If creation failed, try to create a smaller object
if (result == FAILED_TOO_BIG) {
//log_info("\t\t\tAllocation %d failed at size %gMB. Trying smaller.\n", current_allocation, toMB(allocation_this_time));
if (allocation_this_time > reduction_amount)
allocation_this_time -= reduction_amount;
else if (reduction_amount > 1) {
reduction_amount /= 2;
}
else {
allocation_this_time = 0;
}
}
}
if (result == FAILED_ABORT) {
log_error("\t\tAllocation failed.\n");
return FAILED_ABORT;
}
if (!allocation_this_time) {
log_info("\t\tFailed to allocate %gMB across several objects.\n", toMB(size_to_allocate));
return FAILED_TOO_BIG;
}
// Otherwise we succeeded
if (result != SUCCEEDED) {
log_error("Test logic error.");
test_finish();
exit(-1);
}
amount_allocated += allocation_this_time;
*final_size = amount_allocated;
current_allocation++;
}
log_info("\t\tSucceeded in allocating %gMB using %d memory objects.\n", toMB(amount_allocated), current_allocation);
return SUCCEEDED;
}

View File

@@ -0,0 +1,24 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "allocation_utils.h"
int do_allocation(cl_context context, cl_command_queue *queue, cl_device_id device_id, size_t size_to_allocate, int type, cl_mem *mem);
int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
int allocate_image2d_write(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id device_id, int multiple_allocations, size_t size_to_allocate,
int type, cl_mem mems[], int *number_of_mems, size_t *final_size, int force_fill, MTdata d);

View File

@@ -0,0 +1,87 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "allocation_utils.h"
cl_command_queue reset_queue(cl_context context, cl_device_id device_id, cl_command_queue *queue, int *error)
{
log_info("Invalid command queue. Releasing and recreating the command queue.\n");
clReleaseCommandQueue(*queue);
*queue = clCreateCommandQueueWithProperties(context, device_id, 0, error);
return *queue;
}
int check_allocation_error(cl_context context, cl_device_id device_id, int error, cl_command_queue *queue) {
//log_info("check_allocation_error context=%p device_id=%p error=%d *queue=%p\n", context, device_id, error, *queue);
if ((error == CL_MEM_OBJECT_ALLOCATION_FAILURE ) || (error == CL_OUT_OF_RESOURCES ) || (error == CL_OUT_OF_HOST_MEMORY) || (error == CL_INVALID_IMAGE_SIZE)) {
return FAILED_TOO_BIG;
} else if (error == CL_INVALID_COMMAND_QUEUE) {
*queue = reset_queue(context, device_id, queue, &error);
if (CL_SUCCESS != error)
{
log_error("Failed to reset command queue after corrupted queue: %s\n", IGetErrorString(error));
return FAILED_ABORT;
}
// Try again with smaller resources.
return FAILED_TOO_BIG;
} else if (error != CL_SUCCESS) {
log_error("Allocation failed with %s.\n", IGetErrorString(error));
return FAILED_ABORT;
}
return SUCCEEDED;
}
double toMB(cl_ulong size_in) {
return (double)size_in/(1024.0*1024.0);
}
size_t get_actual_allocation_size(cl_mem mem) {
int error;
cl_mem_object_type type;
size_t size, width, height;
error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
if (error) {
print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
return 0;
}
if (type == CL_MEM_OBJECT_BUFFER) {
error = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size), &size, NULL);
if (error) {
print_error(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
return 0;
}
return size;
} else if (type == CL_MEM_OBJECT_IMAGE2D) {
error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
if (error) {
print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
return 0;
}
error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
if (error) {
print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
return 0;
}
return width*height*4*sizeof(cl_uint);
}
log_error("Invalid CL_MEM_TYPE: %d\n", type);
return 0;
}

View File

@@ -0,0 +1,24 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
extern cl_uint checksum;
int check_allocation_error(cl_context context, cl_device_id device_id, int error, cl_command_queue *queue);
double toMB(cl_ulong size_in);
size_t get_actual_allocation_size(cl_mem mem);

View File

@@ -0,0 +1,411 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "allocation_functions.h"
#include "allocation_fill.h"
#include "allocation_execute.h"
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/parseParameters.h"
#include <time.h>
typedef long long unsigned llu;
cl_device_id g_device_id;
cl_device_type g_device_type = CL_DEVICE_TYPE_DEFAULT;
clContextWrapper g_context;
clCommandQueueWrapper g_queue;
int g_repetition_count = 1;
int g_tests_to_run = 0;
int g_reduction_percentage = 100;
int g_write_allocations = 1;
int g_multiple_allocations = 0;
int g_execute_kernel = 1;
cl_uint checksum;
void printUsage( const char *execName )
{
const char *p = strrchr( execName, '/' );
if( p != NULL )
execName = p + 1;
log_info( "Usage: %s [single|multiple] [numReps] [reduction%%] allocType\n", execName );
log_info( "Where:\n" );
log_info( "\tsingle - Tests using a single allocation as large as possible\n" );
log_info( "\tmultiple - Tests using as many allocations as possible\n" );
log_info( "\n" );
log_info( "\tnumReps - Optional integer specifying the number of repetitions to run and average the result (defaults to 1)\n" );
log_info( "\treduction%% - Optional integer, followed by a %% sign, that acts as a multiplier for the target amount of memory.\n" );
log_info( "\t Example: target amount of 512MB and a reduction of 75%% will result in a target of 384MB.\n" );
log_info( "\n" );
log_info( "\tallocType - Allocation type to test with. Can be one of the following:\n" );
log_info( "\t\tbuffer\n");
log_info( "\t\timage2d_read\n");
log_info( "\t\timage2d_write\n");
log_info( "\t\tbuffer_non_blocking\n");
log_info( "\t\timage2d_read_non_blocking\n");
log_info( "\t\timage2d_write_non_blocking\n");
log_info( "\t\tall (runs all of the above in sequence)\n" );
log_info( "\tdo_not_force_fill - Disable explicitly write data to all memory objects after creating them.\n" );
log_info( "\t Without this, the kernel execution can not verify its checksum.\n" );
log_info( "\tdo_not_execute - Disable executing a kernel that accesses all of the memory objects.\n" );
}
int init_cl() {
cl_platform_id platform;
int error;
error = clGetPlatformIDs(1, &platform, NULL);
test_error(error, "clGetPlatformIDs failed");
error = clGetDeviceIDs(platform, g_device_type, 1, &g_device_id, NULL);
test_error(error, "clGetDeviceIDs failed");
/* Create a context */
g_context = clCreateContext( NULL, 1, &g_device_id, notify_callback, NULL, &error );
test_error(error, "clCreateContext failed");
/* Create command queue */
g_queue = clCreateCommandQueueWithProperties( g_context, g_device_id, 0, &error );
test_error(error, "clCreateCommandQueue failed");
return error;
}
int main(int argc, const char *argv[])
{
int error;
int count;
cl_mem mems[MAX_NUMBER_TO_ALLOCATE];
cl_ulong max_individual_allocation_size, global_mem_size;
char str[ 128 ], *endPtr;
int r;
int number_of_mems_used;
int failure_counts = 0;
int test, test_to_run = 0;
int randomize = 0;
size_t final_size, max_size, current_test_size;
test_start();
argc = parseCustomParam(argc, argv);
if (argc == -1)
{
test_finish();
return -1;
}
// Parse arguments
checkDeviceTypeOverride( &g_device_type );
for( int i = 1; i < argc; i++ )
{
strncpy( str, argv[ i ], sizeof( str ) - 1 );
if( strcmp( str, "cpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_CPU" ) == 0 )
g_device_type = CL_DEVICE_TYPE_CPU;
else if( strcmp( str, "gpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_GPU" ) == 0 )
g_device_type = CL_DEVICE_TYPE_GPU;
else if( strcmp( str, "accelerator" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
g_device_type = CL_DEVICE_TYPE_ACCELERATOR;
else if( strcmp( str, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
g_device_type = CL_DEVICE_TYPE_DEFAULT;
else if( strcmp( str, "multiple" ) == 0 )
g_multiple_allocations = 1;
else if( strcmp( str, "randomize" ) == 0 )
randomize = 1;
else if( strcmp( str, "single" ) == 0 )
g_multiple_allocations = 0;
else if( ( r = (int)strtol( str, &endPtr, 10 ) ) && ( endPtr != str ) && ( *endPtr == 0 ) )
{
// By spec, that means the entire string was an integer, so take it as a repetition count
g_repetition_count = r;
}
else if( strcmp( str, "all" ) == 0 )
{
g_tests_to_run = BUFFER | IMAGE_READ | IMAGE_WRITE | BUFFER_NON_BLOCKING | IMAGE_READ_NON_BLOCKING | IMAGE_WRITE_NON_BLOCKING;
}
else if( strchr( str, '%' ) != NULL )
{
// Reduction percentage (let strtol ignore the percentage)
g_reduction_percentage = (int)strtol( str, NULL, 10 );
}
else if( g_tests_to_run == 0 )
{
if( strcmp( str, "buffer" ) == 0 )
{
g_tests_to_run |= BUFFER;
}
else if( strcmp( str, "image2d_read" ) == 0 )
{
g_tests_to_run |= IMAGE_READ;
}
else if( strcmp( str, "image2d_write" ) == 0 )
{
g_tests_to_run |= IMAGE_WRITE;
}
else if( strcmp( str, "buffer_non_blocking" ) == 0 )
{
g_tests_to_run |= BUFFER_NON_BLOCKING;
}
else if( strcmp( str, "image2d_read_non_blocking" ) == 0 )
{
g_tests_to_run |= IMAGE_READ_NON_BLOCKING;
}
else if( strcmp( str, "image2d_write_non_blocking" ) == 0 )
{
g_tests_to_run |= IMAGE_WRITE_NON_BLOCKING;
}
if( g_tests_to_run == 0 )
break; // Argument is invalid; break to print usage
}
else if( strcmp( str, "do_not_force_fill" ) == 0 )
{
g_write_allocations = 0;
}
else if( strcmp( str, "do_not_execute" ) == 0 )
{
g_execute_kernel = 0;
}
}
if( randomize )
{
gRandomSeed = (cl_uint) time( NULL );
log_info( "Random seed: %u.\n", gRandomSeed );
gReSeed = 1;
}
if( g_tests_to_run == 0 )
{
// Allocation type was never specified, or one of the arguments was invalid. Print usage and bail
printUsage( argv[ 0 ] );
return -1;
}
// All ready to go, so set up an environment
error = init_cl();
if (error) {
test_finish();
return -1;
}
if( printDeviceHeader( g_device_id ) != CL_SUCCESS )
{
test_finish();
return -1;
}
error = clGetDeviceInfo(g_device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_individual_allocation_size), &max_individual_allocation_size, NULL);
if ( error ) {
print_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
test_finish();
return -1;
}
error = clGetDeviceInfo(g_device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, NULL);
if ( error ) {
print_error( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
test_finish();
return -1;
}
log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
llu( max_individual_allocation_size ), toMB(max_individual_allocation_size),
llu( global_mem_size ), toMB(global_mem_size));
if (global_mem_size > (cl_ulong)SIZE_MAX) {
global_mem_size = (cl_ulong)SIZE_MAX;
}
if( max_individual_allocation_size > global_mem_size )
{
log_error( "FAILURE: CL_DEVICE_MAX_MEM_ALLOC_SIZE (%llu) is greater than the CL_DEVICE_GLOBAL_MEM_SIZE (%llu)\n", llu( max_individual_allocation_size ), llu( global_mem_size ) );
test_finish();
return -1;
}
// We may need to back off the global_mem_size on unified memory devices to leave room for application and operating system code
// and associated data in the working set, so we dont start pathologically paging.
// Check to see if we are a unified memory device
cl_bool hasUnifiedMemory = CL_FALSE;
if( ( error = clGetDeviceInfo( g_device_id, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof( hasUnifiedMemory ), &hasUnifiedMemory, NULL )))
{
print_error( error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
test_finish();
return -1;
}
// we share unified memory so back off to 1/2 the global memory size.
if( CL_TRUE == hasUnifiedMemory )
{
global_mem_size -= global_mem_size /2;
log_info( "Device shares memory with the host, so backing off the maximum combined allocation size to be %gMB to avoid rampant paging.\n", toMB( global_mem_size ) );
}
else
{
// Lets just use 60% of total available memory as framework/driver may not allow using all of it
// e.g. vram on GPU is used by window server and even for this test, we need some space for context,
// queue, kernel code on GPU.
global_mem_size *= 0.60;
}
// Pick the baseline size based on whether we are doing a single large or multiple allocations
if (!g_multiple_allocations) {
max_size = (size_t)max_individual_allocation_size;
} else {
max_size = (size_t)global_mem_size;
}
// Adjust based on the percentage
if (g_reduction_percentage != 100) {
log_info("NOTE: reducing max allocations to %d%%.\n", g_reduction_percentage);
max_size = (size_t)((double)max_size * (double)g_reduction_percentage/100.0);
}
// Round to nearest MB.
max_size &= (size_t)(0xFFFFFFFFFF00000ULL);
log_info("** Target allocation size (rounded to nearest MB) is: %lu bytes (%gMB).\n", max_size, toMB(max_size));
// Run all the requested tests
RandomSeed seed( gRandomSeed );
for (test=0; test<6; test++) {
if (test == 0) test_to_run = BUFFER;
if (test == 1) test_to_run = IMAGE_READ;
if (test == 2) test_to_run = IMAGE_WRITE;
if (test == 3) test_to_run = BUFFER_NON_BLOCKING;
if (test == 4) test_to_run = IMAGE_READ_NON_BLOCKING;
if (test == 5) test_to_run = IMAGE_WRITE_NON_BLOCKING;
if (!(g_tests_to_run & test_to_run))
continue;
// Skip image tests if we don't support images on the device
if (test > 0 && checkForImageSupport(g_device_id)) {
log_info("Can not test image allocation because device does not support images.\n");
continue;
}
// This section was added in order to fix a bug in the test
// If CL_DEVICE_MAX_MEM_ALLOC_SIZE is much grater than CL_DEVICE_IMAGE2D_MAX_WIDTH * CL_DEVICE_IMAGE2D_MAX_HEIGHT
// The test will fail in image allocations as the size requested for the allocation will be much grater than the maximum size allowed for image
if ( (test_to_run != BUFFER) && (test_to_run != BUFFER_NON_BLOCKING) ) {
size_t max_width, max_height;
cl_ulong max_image2d_size;
error = clGetDeviceInfo(g_device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL );
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_WIDTH");
error = clGetDeviceInfo(g_device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( max_height ), &max_height, NULL );
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_HEIGHT");
max_image2d_size = (cl_ulong)max_height*max_width*4*sizeof(cl_uint);
if (max_individual_allocation_size > max_image2d_size)
{
max_individual_allocation_size = max_image2d_size;
}
}
// Pick the baseline size based on whether we are doing a single large or multiple allocations
if (!g_multiple_allocations) {
max_size = (size_t)max_individual_allocation_size;
} else {
max_size = (size_t)global_mem_size;
}
// Adjust based on the percentage
if (g_reduction_percentage != 100) {
log_info("NOTE: reducing max allocations to %d%%.\n", g_reduction_percentage);
max_size = (size_t)((double)max_size * (double)g_reduction_percentage/100.0);
}
// Round to nearest MB.
max_size &= (size_t)(0xFFFFFFFFFF00000ULL);
log_info("** Target allocation size (rounded to nearest MB) is: %llu bytes (%gMB).\n", llu( max_size ), toMB(max_size));
if (test_to_run == BUFFER || test_to_run == BUFFER_NON_BLOCKING) log_info("** Allocating buffer(s) to size %gMB.\n", toMB(max_size));
else if (test_to_run == IMAGE_READ || test_to_run == IMAGE_READ_NON_BLOCKING) log_info("** Allocating read-only image(s) to size %gMB.\n", toMB(max_size));
else if (test_to_run == IMAGE_WRITE || test_to_run == IMAGE_WRITE_NON_BLOCKING) log_info("** Allocating write-only image(s) to size %gMB.\n", toMB(max_size));
else {log_error("Test logic error.\n"); return -1;}
// Run the test the requested number of times
for (count = 0; count < g_repetition_count; count++) {
current_test_size = max_size;
error = FAILED_TOO_BIG;
log_info(" => Allocation %d\n", count+1);
while (error == FAILED_TOO_BIG && current_test_size > max_size/8) {
// Reset our checksum for each allocation
checksum = 0;
// Do the allocation
error = allocate_size(g_context, &g_queue, g_device_id, g_multiple_allocations, current_test_size, test_to_run, mems, &number_of_mems_used, &final_size, g_write_allocations, seed);
// If we succeeded and we're supposed to execute a kernel, do so.
if (error == SUCCEEDED && g_execute_kernel) {
log_info("\tExecuting kernel with memory objects.\n");
error = execute_kernel(g_context, &g_queue, g_device_id, test_to_run, mems, number_of_mems_used, g_write_allocations);
}
// If we failed to allocate more than 1/8th of the requested amount return a failure.
if (final_size < (size_t)max_size/8) {
// log_error("===> Allocation %d failed to allocate more than 1/8th of the requested size.\n", count+1);
failure_counts++;
}
// Clean up.
for (int i=0; i<number_of_mems_used; i++)
clReleaseMemObject(mems[i]);
if (error == FAILED_ABORT) {
log_error(" => Allocation %d failed.\n", count+1);
failure_counts++;
}
if (error == FAILED_TOO_BIG) {
current_test_size -= max_size/16;
// log_info("\tFailed at this size; trying a smaller size of %gMB.\n", toMB(current_test_size));
}
}
if (error == SUCCEEDED && current_test_size == max_size)
log_info("\tPASS: Allocation succeeded.\n");
else if (error == SUCCEEDED && current_test_size > max_size/8)
log_info("\tPASS: Allocation succeeded at reduced size.\n");
else {
log_error("\tFAIL: Allocation failed.\n");
failure_counts++;
}
}
}
if (failure_counts)
log_error("FAILED allocations test.\n");
else
log_info("PASSED allocations test.\n");
test_finish();
return failure_counts;
}

View File

@@ -0,0 +1,62 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _testBase_h
#define _testBase_h
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#if !defined(_WIN32)
#include <unistd.h>
#endif
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/testHarness.h"
#define MAX_NUMBER_TO_ALLOCATE 100
#define FAILED_CORRUPTED_QUEUE -2
#define FAILED_ABORT -1
#define FAILED_TOO_BIG 1
// On Windows macro `SUCCEEDED' is defined in `WinError.h'. It causes compiler warnings. Let us avoid them.
#if defined( _WIN32 ) && defined( SUCCEEDED )
#undef SUCCEEDED
#endif
#define SUCCEEDED 0
#define BUFFER 1
#define IMAGE_READ 2
#define IMAGE_WRITE 4
#define BUFFER_NON_BLOCKING 8
#define IMAGE_READ_NON_BLOCKING 16
#define IMAGE_WRITE_NON_BLOCKING 32
#define test_error_abort(errCode,msg) test_error_ret_abort(errCode,msg,errCode)
#define test_error_ret_abort(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return FAILED_ABORT ; } }
#endif // _testBase_h

View File

@@ -0,0 +1,39 @@
set(MODULE_NAME API)
set(${MODULE_NAME}_SOURCES
main.c
test_bool.c
test_retain.cpp
test_retain_program.c
test_queries.cpp
test_create_kernels.c
test_kernels.c
test_api_min_max.c
test_kernel_arg_changes.cpp
test_kernel_arg_multi_setup.cpp
test_binary.cpp
test_native_kernel.cpp
test_mem_objects.cpp
test_create_context_from_type.cpp
test_device_min_data_type_align_size_alignment.cpp
test_platform.cpp
test_kernel_arg_info.c
test_null_buffer_arg.c
test_mem_object_info.cpp
test_queue_hint.cpp
test_sub_group_dispatch.cpp
test_clone_kernel.cpp
test_zero_sized_enqueue.cpp
../../test_common/harness/errorHelpers.c
../../test_common/harness/threadTesting.c
../../test_common/harness/testHarness.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/typeWrappers.cpp
../../test_common/harness/conversions.c
../../test_common/harness/mt19937.c
../../test_common/harness/msvc9.c
../../test_common/harness/imageHelpers.cpp
../../test_common/harness/parseParameters.cpp
)
include(../CMakeCommon.txt)

View File

@@ -0,0 +1,27 @@
project
: requirements
<toolset>gcc:<cflags>-xc++
<toolset>msvc:<cflags>"/TP"
;
exe test_api
: main.c
test_api_min_max.c
test_binary.cpp
test_create_kernels.c
test_create_context_from_type.cpp
test_kernel_arg_changes.cpp
test_kernel_arg_multi_setup.cpp
test_kernels.c
test_native_kernel.cpp
test_queries.cpp
test_retain_program.c
test_platform.cpp
;
install dist
: test_api #test.lst
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/api
<variant>release:<location>$(DIST)/release/tests/test_conformance/api
;

View File

@@ -0,0 +1,61 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.c \
test_retain_program.c \
test_queries.cpp \
test_create_kernels.c \
test_kernels.c \
test_kernel_arg_info.c \
test_api_min_max.c \
test_kernel_arg_changes.cpp \
test_kernel_arg_multi_setup.cpp \
test_binary.cpp \
test_native_kernel.cpp \
test_create_context_from_type.cpp \
test_platform.cpp \
test_retain.cpp \
test_device_min_data_type_align_size_alignment.cpp \
test_mem_objects.cpp \
test_bool.c \
test_null_buffer_arg.c \
test_mem_object_info.cpp \
../../test_common/harness/errorHelpers.c \
../../test_common/harness/threadTesting.c \
../../test_common/harness/testHarness.c \
../../test_common/harness/imageHelpers.cpp \
../../test_common/harness/kernelHelpers.c \
../../test_common/harness/typeWrappers.cpp \
../../test_common/harness/mt19937.c \
../../test_common/harness/conversions.c
DEFINES = DONT_TEST_GARBAGE_POINTERS
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
HEADERS =
TARGET = test_api
INCLUDE =
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

223
test_conformance/api/main.c Normal file
View File

@@ -0,0 +1,223 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "procs.h"
#include "../../test_common/harness/testHarness.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
// (for example, generate_random_image_data()), the tests are required to declare
// the following variables (<rdar://problem/11111245>):
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
bool gTestRounding = false;
basefn basefn_list[] = {
test_get_platform_info,
test_get_sampler_info,
test_get_command_queue_info,
test_get_context_info,
test_get_device_info,
test_enqueue_task,
test_binary_get,
test_program_binary_create,
test_kernel_required_group_size,
test_release_kernel_order,
test_release_during_execute,
test_load_single_kernel,
test_load_two_kernels,
test_load_two_kernels_in_one,
test_load_two_kernels_manually,
test_get_program_info_kernel_names,
test_get_kernel_arg_info,
test_create_kernels_in_program,
test_get_kernel_info,
test_execute_kernel_local_sizes,
test_set_kernel_arg_by_index,
test_set_kernel_arg_constant,
test_set_kernel_arg_struct_array,
test_kernel_global_constant,
test_min_max_thread_dimensions,
test_min_max_work_items_sizes,
test_min_max_work_group_size,
test_min_max_read_image_args,
test_min_max_write_image_args,
test_min_max_mem_alloc_size,
test_min_max_image_2d_width,
test_min_max_image_2d_height,
test_min_max_image_3d_width,
test_min_max_image_3d_height,
test_min_max_image_3d_depth,
test_min_max_image_array_size,
test_min_max_image_buffer_size,
test_min_max_parameter_size,
test_min_max_samplers,
test_min_max_constant_buffer_size,
test_min_max_constant_args,
test_min_max_compute_units,
test_min_max_address_bits,
test_min_max_single_fp_config,
test_min_max_double_fp_config,
test_min_max_local_mem_size,
test_min_max_kernel_preferred_work_group_size_multiple,
test_min_max_execution_capabilities,
test_min_max_queue_properties,
test_min_max_device_version,
test_min_max_language_version,
test_kernel_arg_changes,
test_kernel_arg_multi_setup_random,
test_native_kernel,
test_create_context_from_type,
test_platform_extensions,
test_get_platform_ids,
test_for_bool_type,
test_repeated_setup_cleanup,
test_retain_queue_single,
test_retain_queue_multiple,
test_retain_mem_object_single,
test_retain_mem_object_multiple,
test_min_data_type_align_size_alignment,
test_mem_object_destructor_callback,
test_null_buffer_arg,
test_get_buffer_info,
test_get_image2d_info,
test_get_image3d_info,
test_get_image1d_info,
test_get_image1d_array_info,
test_get_image2d_array_info,
test_queue_hint,
test_sub_group_dispatch,
test_clone_kernel,
test_zero_sized_enqueue
};
const char *basefn_names[] = {
"get_platform_info",
"get_sampler_info",
"get_command_queue_info",
"get_context_info",
"get_device_info",
"enqueue_task",
"binary_get",
"binary_create",
"kernel_required_group_size",
"release_kernel_order",
"release_during_execute",
"load_single_kernel",
"load_two_kernels",
"load_two_kernels_in_one",
"load_two_kernels_manually",
"get_program_info_kernel_names",
"get_kernel_arg_info",
"create_kernels_in_program",
"get_kernel_info",
"execute_kernel_local_sizes",
"set_kernel_arg_by_index",
"set_kernel_arg_constant",
"set_kernel_arg_struct_array",
"kernel_global_constant",
"min_max_thread_dimensions",
"min_max_work_items_sizes",
"min_max_work_group_size",
"min_max_read_image_args",
"min_max_write_image_args",
"min_max_mem_alloc_size",
"min_max_image_2d_width",
"min_max_image_2d_height",
"min_max_image_3d_width",
"min_max_image_3d_height",
"min_max_image_3d_depth",
"min_max_image_array_size",
"min_max_image_buffer_size",
"min_max_parameter_size",
"min_max_samplers",
"min_max_constant_buffer_size",
"min_max_constant_args",
"min_max_compute_units",
"min_max_address_bits",
"min_max_single_fp_config",
"min_max_double_fp_config",
"min_max_local_mem_size",
"min_max_kernel_preferred_work_group_size_multiple",
"min_max_execution_capabilities",
"min_max_queue_properties",
"min_max_device_version",
"min_max_language_version",
"kernel_arg_changes",
"kernel_arg_multi_setup_random",
"native_kernel",
"create_context_from_type",
"platform_extensions",
"get_platform_ids",
"bool_type",
"repeated_setup_cleanup",
"retain_queue_single",
"retain_queue_multiple",
"retain_mem_object_single",
"retain_mem_object_multiple",
"min_data_type_align_size_alignment",
"mem_object_destructor_callback",
"null_buffer_arg",
"get_buffer_info",
"get_image2d_info",
"get_image3d_info",
"get_image1d_info",
"get_image1d_array_info",
"get_image2d_array_info",
"queue_hint",
"sub_group_dispatch",
"clone_kernel",
"zero_sized_enqueue",
};
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
int num_fns = sizeof(basefn_names) / sizeof(char *);
int main(int argc, const char *argv[])
{
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
}

View File

@@ -0,0 +1,111 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/clImageHelper.h"
#include "../../test_common/harness/imageHelpers.h"
extern float calculate_ulperror(float a, float b);
extern int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_load_two_kernels_manually(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_for_bool_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_platform_extensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_release_during_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_arg_changes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_arg_multi_setup_random(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_null_buffer_arg( cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements );
extern int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_queue_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_clone_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_zero_sized_enqueue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);

View File

@@ -0,0 +1,31 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _testBase_h
#define _testBase_h
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#endif // _testBase_h

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,218 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
static const char *sample_binary_kernel_source[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid] + 1;\n"
"\n"
"}\n" };
int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
size_t binarySize;
error = create_single_kernel_helper(context, &program, NULL, 1, sample_binary_kernel_source, NULL);
test_error( error, "Unable to build test program" );
// Get the size of the resulting binary (only one device)
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
test_error( error, "Unable to get binary size" );
// Sanity check
if( binarySize == 0 )
{
log_error( "ERROR: Binary size of program is zero\n" );
return -1;
}
// Create a buffer and get the actual binary
unsigned char *binary;
binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
unsigned char *buffers[ 1 ] = { binary };
// Do another sanity check here first
size_t size;
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
test_error( error, "Unable to get expected size of binaries array" );
if( size != sizeof( buffers ) )
{
log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d)\n", (int)sizeof( buffers ), (int)size );
free(binary);
return -1;
}
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary" );
// No way to verify the binary is correct, so just be good with that
free(binary);
return 0;
}
int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
/* To test this in a self-contained fashion, we have to create a program with
source, then get the binary, then use that binary to reload the program, and then verify */
int error;
clProgramWrapper program, program_from_binary;
size_t binarySize;
error = create_single_kernel_helper(context, &program, NULL, 1, sample_binary_kernel_source, NULL);
test_error( error, "Unable to build test program" );
// Get the size of the resulting binary (only one device)
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
test_error( error, "Unable to get binary size" );
// Sanity check
if( binarySize == 0 )
{
log_error( "ERROR: Binary size of program is zero\n" );
return -1;
}
// Create a buffer and get the actual binary
unsigned char *binary = (unsigned char*)malloc(binarySize);
const unsigned char *buffers[ 1 ] = { binary };
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary" );
cl_int loadErrors[ 1 ];
program_from_binary = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, loadErrors, &error );
test_error( error, "Unable to load valid program binary" );
test_error( loadErrors[ 0 ], "Unable to load valid device binary into program" );
error = clBuildProgram( program_from_binary, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build binary program" );
// Get the size of the binary built from the first binary
size_t binary2Size;
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARY_SIZES, sizeof( binary2Size ), &binary2Size, NULL );
test_error( error, "Unable to get size for the binary program" );
// Now get the binary one more time and verify it loaded the right binary
unsigned char *binary2 = (unsigned char*)malloc(binary2Size);
buffers[ 0 ] = binary2;
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary second time" );
// Try again, this time without passing the status ptr in, to make sure we still
// get a valid binary
clProgramWrapper programWithoutStatus = clCreateProgramWithBinary( context, 1, &deviceID, &binary2Size, buffers, NULL, &error );
test_error( error, "Unable to load valid program binary when binary_status pointer is NULL" );
error = clBuildProgram( programWithoutStatus, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build binary program created without binary_status" );
// Get the size of the binary created without passing binary_status
size_t binary3Size;
error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARY_SIZES, sizeof( binary3Size ), &binary3Size, NULL );
test_error( error, "Unable to get size for the binary program created without binary_status" );
// Now get the binary one more time
unsigned char *binary3 = (unsigned char*)malloc(binary3Size);
buffers[ 0 ] = binary3;
error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary from the program created without binary_status" );
// We no longer need these intermediate binaries
free(binary);
free(binary2);
free(binary3);
// Now execute them both to see that they both do the same thing.
clMemWrapper in, out, out_binary;
clKernelWrapper kernel, kernel_binary;
cl_int *out_data, *out_data_binary;
cl_float *in_data;
size_t size_to_run = 1000;
// Allocate some data
in_data = (cl_float*)malloc(sizeof(cl_float)*size_to_run);
out_data = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
out_data_binary = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
memset(out_data, 0, sizeof(cl_int)*size_to_run);
memset(out_data_binary, 0, sizeof(cl_int)*size_to_run);
for (size_t i=0; i<size_to_run; i++)
in_data[i] = (cl_float)i;
// Create the buffers
in = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*size_to_run, in_data, &error);
test_error( error, "clCreateBuffer failed");
out = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data, &error);
test_error( error, "clCreateBuffer failed");
out_binary = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data_binary, &error);
test_error( error, "clCreateBuffer failed");
// Create the kernels
kernel = clCreateKernel(program, "sample_test", &error);
test_error( error, "clCreateKernel failed");
kernel_binary = clCreateKernel(program_from_binary, "sample_test", &error);
test_error( error, "clCreateKernel from binary failed");
// Set the arguments
error = clSetKernelArg(kernel, 0, sizeof(in), &in);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 1, sizeof(out), &out);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_binary, 0, sizeof(in), &in);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_binary, 1, sizeof(out_binary), &out_binary);
test_error( error, "clSetKernelArg failed");
// Execute the kernels
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel failed");
error = clEnqueueNDRangeKernel(queue, kernel_binary, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel for binary kernel failed");
// Finish up
error = clFinish(queue);
test_error( error, "clFinish failed");
// Get the results back
error = clEnqueueReadBuffer(queue, out, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed");
error = clEnqueueReadBuffer(queue, out_binary, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data_binary, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed");
// Compare the results
if( memcmp( out_data, out_data_binary, sizeof(cl_int)*size_to_run ) != 0 )
{
log_error( "ERROR: Results from executing binary and regular kernel differ.\n" );
return -1;
}
// All done!
free(in_data);
free(out_data);
free(out_data_binary);
return 0;
}

View File

@@ -0,0 +1,52 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/testHarness.h"
const char *kernel_with_bool[] = {
"__kernel void kernel_with_bool(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" bool myBool = (src[tid] < 0.5f) && (src[tid] > -0.5f);\n"
" if(myBool)\n"
" {\n"
" dst[tid] = (int)src[tid];\n"
" }\n"
" else\n"
" {\n"
" dst[tid] = 0;\n"
" }\n"
"\n"
"}\n"
};
int test_for_bool_type(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
cl_program program;
cl_kernel kernel;
int err = create_single_kernel_helper(context,
&program,
&kernel,
1, kernel_with_bool,
"kernel_with_bool" );
return err;
}

View File

@@ -0,0 +1,411 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/conversions.h"
#include <sstream>
#include <string>
#include <cmath>
using namespace std;
const char *clone_kernel_test_img[] =
{
"__kernel void img_read_kernel(read_only image2d_t img, sampler_t sampler, __global int* outbuf)\n"
"{\n"
" uint4 color;\n"
"\n"
" color = read_imageui(img, sampler, (int2)(0,0));\n"
" \n"
" // 7, 8, 9, 10th DWORD\n"
" outbuf[7] = color.x;\n"
" outbuf[8] = color.y;\n"
" outbuf[9] = color.z;\n"
" outbuf[10] = color.w;\n"
"}\n"
"\n"
"__kernel void img_write_kernel(write_only image2d_t img, uint4 color)\n"
"{\n"
" write_imageui (img, (int2)(0, 0), color);\n"
"}\n"
};
const char *clone_kernel_test_double[] =
{
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
"__kernel void clone_kernel_test1(double d, __global double* outbuf)\n"
"{\n"
" // use the same outbuf as rest of the tests\n"
" outbuf[2] = d;\n"
"}\n"
};
const char *clone_kernel_test_kernel[] = {
"typedef struct\n"
"{\n"
" int i;\n"
" float f;\n"
"} structArg;\n"
"\n"
"// value type test\n"
"__kernel void clone_kernel_test0(int iarg, float farg, structArg sarg, __local int* localbuf, __global int* outbuf)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" outbuf[0] = iarg;\n"
" outbuf[1] = sarg.i;\n"
" \n"
" ((__global float*)outbuf)[2] = farg;\n"
" ((__global float*)outbuf)[3] = sarg.f;\n"
"}\n"
"\n"
"__kernel void buf_read_kernel(__global int* buf, __global int* outbuf)\n"
"{\n"
" // 6th DWORD\n"
" outbuf[6] = buf[0];\n"
"}\n"
"\n"
"__kernel void buf_write_kernel(__global int* buf, int write_val)\n"
"{\n"
" buf[0] = write_val;\n"
"}\n"
};
const int BUF_SIZE = 128;
struct structArg
{
int i;
float f;
};
static unsigned char *
generate_8888_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32( d);
return ptr;
}
int test_image_arg_shallow_clone(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, void* pbufRes, clMemWrapper& bufOut)
{
int error;
cl_image_format img_format;
clSamplerWrapper sampler;
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNSIGNED_INT8;
cl_image_desc imageDesc;
memset(&imageDesc, 0x0, sizeof(cl_image_desc));
imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
imageDesc.image_width = 512;
imageDesc.image_height = 512;
cl_uint color[4] = {1,3,5,7};
clProgramWrapper program;
clKernelWrapper kernel_read;
clKernelWrapper kernel_write;
clKernelWrapper kernel_cloned;
size_t ndrange1 = 1;
clMemWrapper img;
if( create_single_kernel_helper( context, &program, &kernel_read, 1, clone_kernel_test_img, "img_read_kernel" ) != 0 )
{
return -1;
}
if( create_single_kernel_helper( context, &program, &kernel_write, 1, clone_kernel_test_img, "img_write_kernel" ) != 0 )
{
return -1;
}
img = clCreateImage(context, CL_MEM_READ_WRITE, &img_format, &imageDesc, NULL, &error);
test_error( error, "clCreateImage failed." );
cl_sampler_properties properties[] = {
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
0 };
sampler = clCreateSamplerWithProperties(context, properties, &error);
test_error( error, "clCreateSamplerWithProperties failed." );
error = clSetKernelArg(kernel_write, 1, sizeof(int) * 4, color);
error += clSetKernelArg(kernel_write, 0, sizeof(cl_mem), &img);
test_error( error, "clSetKernelArg failed." );
error = clEnqueueNDRangeKernel(queue, kernel_write, 1, NULL, &ndrange1, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel failed." );
error = clSetKernelArg(kernel_read, 0, sizeof(cl_mem), &img);
error += clSetKernelArg(kernel_read, 1, sizeof(cl_sampler), &sampler);
error += clSetKernelArg(kernel_read, 2, sizeof(cl_mem), &bufOut);
test_error( error, "clSetKernelArg failed." );
// clone the kernel
kernel_cloned = clCloneKernel(kernel_read, &error);
test_error( error, "clCloneKernel failed." );
error = clEnqueueNDRangeKernel(queue, kernel_cloned, 1, NULL, &ndrange1, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel failed." );
// read result back
error = clEnqueueReadBuffer(queue, bufOut, CL_TRUE, 0, 128, pbufRes, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed." );
if (((cl_uint*)pbufRes)[7] != color[0])
{
test_error( error, "clCloneKernel test failed." );
return -1;
}
if (((cl_uint*)pbufRes)[8] != color[1])
{
test_error( error, "clCloneKernel test failed." );
return -1;
}
if (((cl_uint*)pbufRes)[9] != color[2])
{
test_error( error, "clCloneKernel test failed." );
return -1;
}
if (((cl_uint*)pbufRes)[10] != color[3])
{
test_error( error, "clCloneKernel test failed." );
return -1;
}
return 0;
}
int test_double_arg_clone(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, void* pbufRes, clMemWrapper& bufOut)
{
int error = 0;
clProgramWrapper program;
clKernelWrapper kernel;
clKernelWrapper kernel_cloned;
size_t ndrange1 = 1;
if( create_single_kernel_helper( context, &program, &kernel, 1, clone_kernel_test_double, "clone_kernel_test1" ) != 0 )
{
return -1;
}
cl_double d = 1.23;
error = clSetKernelArg(kernel, 0, sizeof(double), &d);
error += clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufOut);
test_error( error, "clSetKernelArg failed." );
kernel_cloned = clCloneKernel(kernel, &error);
test_error( error, "clCloneKernel failed." );
error = clEnqueueNDRangeKernel(queue, kernel_cloned, 1, NULL, &ndrange1, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel failed." );
// read result back
error = clEnqueueReadBuffer(queue, bufOut, CL_TRUE, 0, BUF_SIZE, pbufRes, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed." );
if (abs(((cl_double*)pbufRes)[2] - d) > 0.0000001)
{
test_error( error, "clCloneKernel test failed." );
return -1;
}
return 0;
}
int test_clone_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clKernelWrapper kernel_pipe_read;
clKernelWrapper kernel_buf_read;
clKernelWrapper kernel_pipe_write;
clKernelWrapper kernel_buf_write;
clKernelWrapper kernel_pipe_read_cloned;
clKernelWrapper kernel_buf_read_cloned;
size_t ndrange1 = 1;
int write_val = 123;
cl_bool bimg = CL_FALSE;
cl_bool bdouble = CL_FALSE;
// test image support
error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &bimg, NULL);
test_error( error, "clGetDeviceInfo failed." );
// test double support
size_t ext_str_size;
error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &ext_str_size);
test_error( error, "clGetDeviceInfo failed." );
char* ext_str = new char[ext_str_size+1];
error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, ext_str_size, ext_str, NULL);
test_error( error, "clGetDeviceInfo failed." );
ext_str[ext_str_size] = '\0';
stringstream ss;
ss << ext_str;
while (!ss.eof())
{
string s;
ss >> s;
if (s == "cl_khr_fp64")
{
bdouble = CL_TRUE;
break;
}
}
/* Create kernels to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, clone_kernel_test_kernel, "clone_kernel_test0" ) != 0 )
{
return -1;
}
if( create_single_kernel_helper( context, &program, &kernel_buf_read, 1, clone_kernel_test_kernel, "buf_read_kernel" ) != 0 )
{
return -1;
}
if( create_single_kernel_helper( context, &program, &kernel_buf_write, 1, clone_kernel_test_kernel, "buf_write_kernel" ) != 0 )
{
return -1;
}
// Kernel args
// Value type
int intarg = 0;
float farg = 1.0;
structArg sa = { 1, 1.0f };
// cl_mem
clMemWrapper buf, bufOut;
char* pbuf = new char[BUF_SIZE];
char* pbufRes = new char[BUF_SIZE];
buf = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, BUF_SIZE, pbuf, &error);
test_error( error, "clCreateBuffer failed." );
bufOut = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, BUF_SIZE, NULL, &error);
test_error( error, "clCreateBuffer failed." );
clMemWrapper pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(int), 16, NULL, &error);
test_error( error, "clCreatePipe failed." );
error = clSetKernelArg(kernel, 0, sizeof(int), &intarg);
error += clSetKernelArg(kernel, 1, sizeof(float), &farg);
error += clSetKernelArg(kernel, 2, sizeof(structArg), &sa);
error += clSetKernelArg(kernel, 3, 128, NULL); // local mem
test_error( error, "clSetKernelArg failed." );
// clone the kernel
clKernelWrapper clonek = clCloneKernel(kernel, &error);
test_error( error, "clCloneKernel failed." );
// set the last arg and enqueue
error = clSetKernelArg(clonek, 4, sizeof(cl_mem), &bufOut);
test_error( error, "clSetKernelArg failed." );
error = clEnqueueNDRangeKernel(queue, clonek, 1, NULL, &ndrange1, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel failed." );
// shallow clone tests for buffer, svm and pipes
error = clSetKernelArg(kernel_buf_write, 0, sizeof(cl_mem), &buf);
error += clSetKernelArg(kernel_buf_write, 1, sizeof(int), &write_val);
test_error( error, "clSetKernelArg failed." );
error = clEnqueueNDRangeKernel(queue, kernel_buf_write, 1, NULL, &ndrange1, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel failed." );
error = clSetKernelArg(kernel_buf_read, 0, sizeof(cl_mem), &buf);
error += clSetKernelArg(kernel_buf_read, 1, sizeof(cl_mem), &bufOut);
test_error( error, "clSetKernelArg failed." );
// clone the kernel
kernel_buf_read_cloned = clCloneKernel(kernel_buf_read, &error);
test_error( error, "clCloneKernel API call failed." );
error = clEnqueueNDRangeKernel(queue, kernel_buf_read_cloned, 1, NULL, &ndrange1, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel failed." );
// read result back
error = clEnqueueReadBuffer(queue, bufOut, CL_TRUE, 0, BUF_SIZE, pbufRes, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed." );
// Compare the results
if (((int*)pbufRes)[0] != intarg)
{
test_error( error, "clCloneKernel test failed. Failed to clone integer type argument." );
return -1;
}
if (((int*)pbufRes)[1] != sa.i)
{
test_error( error, "clCloneKernel test failed. Failed to clone structure type argument." );
return -1;
}
if (((float*)pbufRes)[2] != farg)
{
test_error( error, "clCloneKernel test failed. Failed to clone structure type argument." );
return -1;
}
if (((float*)pbufRes)[3] != sa.f)
{
test_error( error, "clCloneKernel test failed. Failed to clone float type argument." );
return -1;
}
if (((int*)pbufRes)[6] != write_val)
{
test_error( error, "clCloneKernel test failed. Failed to clone cl_mem argument." );
return -1;
}
if (bimg)
{
error = test_image_arg_shallow_clone(deviceID, context, queue, num_elements, pbufRes, bufOut);
test_error( error, "image arg shallow clone test failed." );
}
if (bdouble)
{
error = test_double_arg_clone(deviceID, context, queue, num_elements, pbufRes, bufOut);
test_error( error, "double arg clone test failed." );
}
delete [] pbuf;
delete [] pbufRes;
delete [] ext_str;
return 0;
}

View File

@@ -0,0 +1,130 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/testHarness.h"
#ifndef _WIN32
#include <unistd.h>
#endif
#include "../../test_common/harness/conversions.h"
extern cl_uint gRandomSeed;
int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
clContextWrapper context_to_test;
clCommandQueueWrapper queue_to_test;
size_t threads[1], localThreads[1];
cl_float inputData[10];
cl_int outputData[10];
int i;
RandomSeed seed( gRandomSeed );
const char *sample_single_test_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
cl_device_type type;
error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed\n");
cl_platform_id platform;
error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed\n");
cl_context_properties properties[3] = {
(cl_context_properties)CL_CONTEXT_PLATFORM,
(cl_context_properties)platform,
NULL
};
context_to_test = clCreateContextFromType(properties, type, notify_callback, NULL, &error);
test_error(error, "clCreateContextFromType failed");
if (context_to_test == NULL) {
log_error("clCreateContextFromType returned NULL, but error was CL_SUCCESS.");
return -1;
}
queue_to_test = clCreateCommandQueueWithProperties(context_to_test, deviceID, NULL, &error);
test_error(error, "clCreateCommandQueue failed");
if (queue_to_test == NULL) {
log_error("clCreateCommandQueue returned NULL, but error was CL_SUCCESS.");
return -1;
}
/* Create a kernel to test with */
if( create_single_kernel_helper( context_to_test, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Write some test data */
memset( outputData, 0, sizeof( outputData ) );
for (i=0; i<10; i++)
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
error = clEnqueueWriteBuffer(queue_to_test, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
test_error( error, "Unable to set testing kernel data" );
/* Test setting the arguments by index manually */
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context_to_test, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue_to_test, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue_to_test, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,595 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/testHarness.h"
const char *sample_single_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
size_t sample_single_kernel_lengths[1];
const char *sample_two_kernels[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n",
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n" };
size_t sample_two_kernel_lengths[2];
const char *sample_two_kernels_in_1[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n" };
size_t sample_two_kernels_in_1_lengths[1];
const char *repeate_test_kernel =
"__kernel void test_kernel(__global int *src, __global int *dst)\n"
"{\n"
" dst[get_global_id(0)] = src[get_global_id(0)]+1;\n"
"}\n";
int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
cl_program testProgram;
clKernelWrapper kernel;
cl_context testContext;
unsigned int numKernels;
cl_char testName[512];
cl_uint testArgCount;
size_t realSize;
error = create_single_kernel_helper(context, &program, NULL, 1, sample_single_kernel, NULL);
test_error( error, "Unable to build test program" );
error = clCreateKernelsInProgram(program, 1, &kernel, &numKernels);
test_error( error, "Unable to create single kernel program" );
/* Check program and context pointers */
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( cl_program ), &testProgram, &realSize );
test_error( error, "Unable to get kernel's program" );
if( (cl_program)testProgram != (cl_program)program )
{
log_error( "ERROR: Returned kernel's program does not match program used to create it! (Got %p, expected %p)\n", (cl_program)testProgram, (cl_program)program );
return -1;
}
if( realSize != sizeof( cl_program ) )
{
log_error( "ERROR: Returned size of kernel's program does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_program ), (int)realSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( cl_context ), &testContext, &realSize );
test_error( error, "Unable to get kernel's context" );
if( (cl_context)testContext != (cl_context)context )
{
log_error( "ERROR: Returned kernel's context does not match program used to create it! (Got %p, expected %p)\n", (cl_context)testContext, (cl_context)context );
return -1;
}
if( realSize != sizeof( cl_context ) )
{
log_error( "ERROR: Returned size of kernel's context does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_context ), (int)realSize );
return -1;
}
/* Test arg count */
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &realSize );
test_error( error, "Unable to get size of arg count info from kernel" );
if( realSize != sizeof( testArgCount ) )
{
log_error( "ERROR: size of arg count not valid! %d\n", (int)realSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
test_error( error, "Unable to get arg count from kernel" );
if( testArgCount != 2 )
{
log_error( "ERROR: Kernel arg count does not match!\n" );
return -1;
}
/* Test function name */
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, &realSize );
test_error( error, "Unable to get name from kernel" );
if( strcmp( (char *)testName, "sample_test" ) != 0 )
{
log_error( "ERROR: Kernel names do not match!\n" );
return -1;
}
if( realSize != strlen( (char *)testName ) + 1 )
{
log_error( "ERROR: Length of kernel name returned does not validate (expected %d, got %d)\n", (int)strlen( (char *)testName ) + 1, (int)realSize );
return -1;
}
/* All done */
return 0;
}
int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel[2];
unsigned int numKernels;
cl_char testName[ 512 ];
cl_uint testArgCount;
error = create_single_kernel_helper(context, &program, NULL, 2, sample_two_kernels, NULL);
test_error( error, "Unable to build test program" );
error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
test_error( error, "Unable to create dual kernel program" );
if( numKernels != 2 )
{
log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
return -1;
}
/* Check first kernel */
error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from kernel" );
int found_kernel1 = 0, found_kernel2 = 0;
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from second kernel" );
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
if (found_kernel1) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
if (found_kernel2) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
if( !found_kernel1 || !found_kernel2 )
{
log_error( "ERROR: Kernel names do not match.\n" );
if (!found_kernel1)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
if (!found_kernel2)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
return -1;
}
error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
test_error( error, "Unable to get arg count from kernel" );
if( testArgCount != 2 )
{
log_error( "ERROR: wrong # of args for kernel\n" );
return -1;
}
/* All done */
return 0;
}
int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel[2];
unsigned int numKernels;
cl_char testName[512];
cl_uint testArgCount;
error = create_single_kernel_helper(context, &program, NULL, 1, sample_two_kernels_in_1, NULL);
test_error( error, "Unable to build test program" );
error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
test_error( error, "Unable to create dual kernel program" );
if( numKernels != 2 )
{
log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
return -1;
}
/* Check first kernel */
error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from kernel" );
int found_kernel1 = 0, found_kernel2 = 0;
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
test_error( error, "Unable to get arg count from kernel" );
if( testArgCount != 2 )
{
log_error( "ERROR: wrong # of args for kernel\n" );
return -1;
}
/* Check second kernel */
error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from kernel" );
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
if (found_kernel1) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
if (found_kernel2) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
if( !found_kernel1 || !found_kernel2 )
{
log_error( "ERROR: Kernel names do not match.\n" );
if (!found_kernel1)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
if (!found_kernel2)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
return -1;
}
/* All done */
return 0;
}
int test_load_two_kernels_manually( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel1, kernel2;
int error;
/* Now create a test program */
error = create_single_kernel_helper(context, &program, NULL, 1, sample_two_kernels_in_1, NULL);
test_error( error, "Unable to build test program" );
/* Try manually creating kernels (backwards just in case) */
kernel1 = clCreateKernel( program, "sample_test2", &error );
if( kernel1 == NULL || error != CL_SUCCESS )
{
print_error( error, "Could not get kernel 1" );
return -1;
}
kernel2 = clCreateKernel( program, "sample_test", &error );
if( kernel2 == NULL )
{
print_error( error, "Could not get kernel 2" );
return -1;
}
return 0;
}
int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel1, kernel2;
int error;
size_t i;
/* Now create a test program */
error = create_single_kernel_helper(context, &program, NULL, 1, sample_two_kernels_in_1, NULL);
test_error( error, "Unable to build test program" );
/* Lookup the number of kernels in the program. */
size_t total_kernels = 0;
error = clGetProgramInfo(program, CL_PROGRAM_NUM_KERNELS, sizeof(size_t),&total_kernels,NULL);
test_error( error, "Unable to get program info num kernels");
if (total_kernels != 2)
{
print_error( error, "Program did not contain two kernels" );
return -1;
}
/* Lookup the kernel names. */
const char* actual_names[] = { "sample_test;sample_test2", "sample_test2;sample_test"} ;
size_t kernel_names_len = 0;
error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,0,NULL,&kernel_names_len);
test_error( error, "Unable to get length of kernel names list." );
if (kernel_names_len != (strlen(actual_names[0])+1))
{
print_error( error, "Kernel names length did not match");
return -1;
}
const size_t len = (kernel_names_len+1)*sizeof(char);
char* kernel_names = (char*)malloc(len);
error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,len,kernel_names,&kernel_names_len);
test_error( error, "Unable to get kernel names list." );
/* Check to see if the kernel name array is null terminated. */
if (kernel_names[kernel_names_len-1] != '\0')
{
free(kernel_names);
print_error( error, "Kernel name list was not null terminated");
return -1;
}
/* Check to see if the correct kernel name string was returned. */
for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
if( 0 == strcmp(actual_names[i],kernel_names) )
break;
if (i == sizeof( actual_names ) / sizeof( actual_names[0] ) )
{
free(kernel_names);
log_error( "Kernel names \"%s\" did not match:\n", kernel_names );
for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
log_error( "\t\t\"%s\"\n", actual_names[0] );
return -1;
}
free(kernel_names);
/* Try manually creating kernels (backwards just in case) */
kernel1 = clCreateKernel( program, "sample_test", &error );
if( kernel1 == NULL || error != CL_SUCCESS )
{
print_error( error, "Could not get kernel 1" );
return -1;
}
kernel2 = clCreateKernel( program, "sample_test2", &error );
if( kernel2 == NULL )
{
print_error( error, "Could not get kernel 2" );
return -1;
}
return 0;
}
static const char *single_task_kernel[] = {
"__kernel void sample_test(__global int *dst, int count)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" for( int i = 0; i < count; i++ )\n"
" dst[i] = tid + i;\n"
"\n"
"}\n" };
int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper output;
cl_int count;
if( create_single_kernel_helper( context, &program, &kernel, 1, single_task_kernel, "sample_test" ) )
return -1;
// Create args
count = 100;
output = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * count, NULL, &error );
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &output );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( cl_int ), &count );
test_error( error, "Unable to set kernel argument" );
// Run task
error = clEnqueueTask( queue, kernel, 0, NULL, NULL );
test_error( error, "Unable to run task" );
// Read results
cl_int *results = (cl_int*)malloc(sizeof(cl_int)*count);
error = clEnqueueReadBuffer( queue, output, CL_TRUE, 0, sizeof( cl_int ) * count, results, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Validate
for( cl_int i = 0; i < count; i++ )
{
if( results[ i ] != i )
{
log_error( "ERROR: Task result value %d did not validate! Expected %d, got %d\n", (int)i, (int)i, (int)results[ i ] );
free(results);
return -1;
}
}
/* All done */
free(results);
return 0;
}
#define TEST_SIZE 1000
int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_context local_context;
cl_command_queue local_queue;
cl_program local_program;
cl_kernel local_kernel;
cl_mem local_mem_in, local_mem_out;
cl_event local_event;
size_t global_dim[3];
int i, j, error;
global_dim[0] = TEST_SIZE;
global_dim[1] = 1; global_dim[2] = 1;
cl_int *inData, *outData;
cl_int status;
inData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
outData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
for (i=0; i<TEST_SIZE; i++) {
inData[i] = i;
}
for (i=0; i<100; i++) {
memset(outData, 0, sizeof(cl_int)*TEST_SIZE);
local_context = clCreateContext(NULL, 1, &deviceID, notify_callback, NULL, &error);
test_error( error, "clCreateContext failed");
local_queue = clCreateCommandQueueWithProperties(local_context, deviceID, 0, &error);
test_error( error, "clCreateCommandQueue failed");
error = create_single_kernel_helper(local_context, &local_program, NULL, 1, &repeate_test_kernel, NULL);
test_error( error, "Unable to build test program" );
local_kernel = clCreateKernel(local_program, "test_kernel", &error);
test_error( error, "clCreateKernel failed");
local_mem_in = clCreateBuffer(local_context, CL_MEM_READ_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
test_error( error, "clCreateBuffer failed");
local_mem_out = clCreateBuffer(local_context, CL_MEM_WRITE_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
test_error( error, "clCreateBuffer failed");
error = clEnqueueWriteBuffer(local_queue, local_mem_in, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), inData, 0, NULL, NULL);
test_error( error, "clEnqueueWriteBuffer failed");
error = clEnqueueWriteBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
test_error( error, "clEnqueueWriteBuffer failed");
error = clSetKernelArg(local_kernel, 0, sizeof(local_mem_in), &local_mem_in);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(local_kernel, 1, sizeof(local_mem_out), &local_mem_out);
test_error( error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(local_queue, local_kernel, 1, NULL, global_dim, NULL, 0, NULL, &local_event);
test_error( error, "clEnqueueNDRangeKernel failed");
error = clWaitForEvents(1, &local_event);
test_error( error, "clWaitForEvents failed");
error = clGetEventInfo(local_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL);
test_error( error, "clGetEventInfo failed");
if (status != CL_COMPLETE) {
log_error( "Kernel execution not complete: status %d.\n", status);
free(inData);
free(outData);
return -1;
}
error = clEnqueueReadBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed");
clReleaseEvent(local_event);
clReleaseMemObject(local_mem_in);
clReleaseMemObject(local_mem_out);
clReleaseKernel(local_kernel);
clReleaseProgram(local_program);
clReleaseCommandQueue(local_queue);
clReleaseContext(local_context);
for (j=0; j<TEST_SIZE; j++) {
if (outData[j] != inData[j] + 1) {
log_error("Results failed to validate at iteration %d. %d != %d.\n", i, outData[j], inData[j] + 1);
free(inData);
free(outData);
return -1;
}
}
}
free(inData);
free(outData);
return 0;
}

View File

@@ -0,0 +1,60 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/testHarness.h"
#ifndef _WIN32
#include <unistd.h>
#endif
int IsAPowerOfTwo( unsigned long x )
{
return 0 == (x & (x-1));
}
int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
cl_uint min_alignment;
if (gHasLong)
min_alignment = sizeof(cl_long)*16;
else
min_alignment = sizeof(cl_int)*16;
int error = 0;
cl_uint alignment;
error = clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(alignment), &alignment, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_MEM_BASE_ADDR_ALIGN failed");
log_info("Device reported CL_DEVICE_MEM_BASE_ADDR_ALIGN = %lu bits.\n", (unsigned long)alignment);
// Verify the size is large enough
if (alignment < min_alignment*8) {
log_error("ERROR: alignment too small. Minimum alignment for %s16 is %lu bits, device reported %lu bits.",
(gHasLong) ? "long" : "int",
(unsigned long)(min_alignment*8), (unsigned long)alignment);
return -1;
}
// Verify the size is a power of two
if (!IsAPowerOfTwo((unsigned long)alignment)) {
log_error("ERROR: alignment is not a power of two.\n");
return -1;
}
return 0;
}

View File

@@ -0,0 +1,141 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
extern "C" { extern cl_uint gRandomSeed;}
// This test is designed to stress changing kernel arguments between execute calls (that are asynchronous and thus
// potentially overlapping) to make sure each kernel gets the right arguments
// Note: put a delay loop in the kernel to make sure we have time to queue the next kernel before this one finishes
const char *inspect_image_kernel_source[] = {
"__kernel void sample_test(read_only image2d_t src, __global int *outDimensions )\n"
"{\n"
" int tid = get_global_id(0), i;\n"
" for( i = 0; i < 100000; i++ ); \n"
" outDimensions[tid * 2] = get_image_width(src) * tid;\n"
" outDimensions[tid * 2 + 1] = get_image_height(src) * tid;\n"
"\n"
"}\n" };
#define NUM_TRIES 100
#define NUM_THREADS 2048
int test_kernel_arg_changes(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
int error, i;
clMemWrapper images[ NUM_TRIES ];
size_t sizes[ NUM_TRIES ][ 2 ];
clMemWrapper results[ NUM_TRIES ];
cl_image_format imageFormat;
size_t maxWidth, maxHeight;
size_t threads[1], localThreads[1];
cl_int resultArray[ NUM_THREADS * 2 ];
char errStr[ 128 ];
RandomSeed seed( gRandomSeed );
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
// Just get any ol format to test with
error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &imageFormat );
test_error( error, "Unable to obtain suitable image format to test with!" );
// Create our testing kernel
error = create_single_kernel_helper( context, &program, &kernel, 1, inspect_image_kernel_source, "sample_test" );
test_error( error, "Unable to create testing kernel" );
// Get max dimensions for each of our images
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
test_error( error, "Unable to get max image dimensions for device" );
// Get the number of threads we'll be able to run
threads[0] = NUM_THREADS;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size for kernel" );
// Create a variety of images and output arrays
for( i = 0; i < NUM_TRIES; i++ )
{
sizes[ i ][ 0 ] = genrand_int32(seed) % (maxWidth/32) + 1;
sizes[ i ][ 1 ] = genrand_int32(seed) % (maxHeight/32) + 1;
images[ i ] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY),
&imageFormat, sizes[ i ][ 0], sizes[ i ][ 1 ], 0, NULL, &error );
if( images[i] == NULL )
{
log_error("Failed to create image %d of size %d x %d (%s).\n", i, (int)sizes[i][0], (int)sizes[i][1], IGetErrorString( error ));
return -1;
}
results[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * threads[0] * 2, NULL, &error );
if( results[i] == NULL)
{
log_error("Failed to create array %d of size %d.\n", i, (int)threads[0]*2);
return -1;
}
}
// Start setting arguments and executing kernels
for( i = 0; i < NUM_TRIES; i++ )
{
// Set the arguments for this try
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &images[ i ] );
sprintf( errStr, "Unable to set argument 0 for kernel try %d", i );
test_error( error, errStr );
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &results[ i ] );
sprintf( errStr, "Unable to set argument 1 for kernel try %d", i );
test_error( error, errStr );
// Queue up execution
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
sprintf( errStr, "Unable to execute kernel try %d", i );
test_error( error, errStr );
}
// Read the results back out, one at a time, and verify
for( i = 0; i < NUM_TRIES; i++ )
{
error = clEnqueueReadBuffer( queue, results[ i ], CL_TRUE, 0, sizeof( cl_int ) * threads[0] * 2, resultArray, 0, NULL, NULL );
sprintf( errStr, "Unable to read results for kernel try %d", i );
test_error( error, errStr );
// Verify. Each entry should be n * the (width/height) of image i
for( int j = 0; j < NUM_THREADS; j++ )
{
if( resultArray[ j * 2 + 0 ] != (int)sizes[ i ][ 0 ] * j )
{
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a width of %d, got %d\n",
i, j, (int)sizes[ i ][ 0 ] * j, resultArray[ j * 2 + 0 ] );
return -1;
}
if( resultArray[ j * 2 + 1 ] != (int)sizes[ i ][ 1 ] * j )
{
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a height of %d, got %d\n",
i, j, (int)sizes[ i ][ 1 ] * j, resultArray[ j * 2 + 1 ] );
return -1;
}
}
}
// If we got here, everything verified successfully
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,277 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/conversions.h"
// This test is designed to stress passing multiple vector parameters to kernels and verifying access between them all
const char *multi_arg_kernel_source_pattern =
"__kernel void sample_test(__global %s *src1, __global %s *src2, __global %s *src3, __global %s *dst1, __global %s *dst2, __global %s *dst3 )\n"
"{\n"
" int tid = get_global_id(0);\n"
" dst1[tid] = src1[tid];\n"
" dst2[tid] = src2[tid];\n"
" dst3[tid] = src3[tid];\n"
"}\n";
extern cl_uint gRandomSeed;
#define MAX_ERROR_TOLERANCE 0.0005f
int test_multi_arg_set(cl_device_id device, cl_context context, cl_command_queue queue,
ExplicitType vec1Type, int vec1Size,
ExplicitType vec2Type, int vec2Size,
ExplicitType vec3Type, int vec3Size, MTdata d)
{
clProgramWrapper program;
clKernelWrapper kernel;
int error, i, j;
clMemWrapper streams[ 6 ];
size_t threads[1], localThreads[1];
char programSrc[ 10248 ], vec1Name[ 64 ], vec2Name[ 64 ], vec3Name[ 64 ];
char sizeNames[][ 4 ] = { "", "2", "3", "4", "", "", "", "8" };
const char *ptr;
void *initData[3], *resultData[3];
// Create the program source
sprintf( vec1Name, "%s%s", get_explicit_type_name( vec1Type ), sizeNames[ vec1Size - 1 ] );
sprintf( vec2Name, "%s%s", get_explicit_type_name( vec2Type ), sizeNames[ vec2Size - 1 ] );
sprintf( vec3Name, "%s%s", get_explicit_type_name( vec3Type ), sizeNames[ vec3Size - 1 ] );
sprintf( programSrc, multi_arg_kernel_source_pattern,
vec1Name, vec2Name, vec3Name, vec1Name, vec2Name, vec3Name,
vec1Size, vec1Size, vec2Size, vec2Size, vec3Size, vec3Size );
ptr = programSrc;
// Create our testing kernel
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" );
test_error( error, "Unable to create testing kernel" );
// Get thread dimensions
threads[0] = 1024;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size for kernel" );
// Create input streams
initData[ 0 ] = create_random_data( vec1Type, d, (unsigned int)threads[ 0 ] * vec1Size );
streams[ 0 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, initData[ 0 ], &error );
test_error( error, "Unable to create testing stream" );
initData[ 1 ] = create_random_data( vec2Type, d, (unsigned int)threads[ 0 ] * vec2Size );
streams[ 1 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, initData[ 1 ], &error );
test_error( error, "Unable to create testing stream" );
initData[ 2 ] = create_random_data( vec3Type, d, (unsigned int)threads[ 0 ] * vec3Size );
streams[ 2 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, initData[ 2 ], &error );
test_error( error, "Unable to create testing stream" );
streams[ 3 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, NULL, &error );
test_error( error, "Unable to create testing stream" );
streams[ 4 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, NULL, &error );
test_error( error, "Unable to create testing stream" );
streams[ 5 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, NULL, &error );
test_error( error, "Unable to create testing stream" );
// Set the arguments
error = 0;
for( i = 0; i < 6; i++ )
error |= clSetKernelArg( kernel, i, sizeof( cl_mem ), &streams[ i ] );
test_error( error, "Unable to set arguments for kernel" );
// Execute!
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to execute kernel" );
// Read results
resultData[0] = malloc( get_explicit_type_size( vec1Type ) * vec1Size * threads[0] );
resultData[1] = malloc( get_explicit_type_size( vec2Type ) * vec2Size * threads[0] );
resultData[2] = malloc( get_explicit_type_size( vec3Type ) * vec3Size * threads[0] );
error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, get_explicit_type_size( vec1Type ) * vec1Size * threads[ 0 ], resultData[0], 0, NULL, NULL );
error |= clEnqueueReadBuffer( queue, streams[ 4 ], CL_TRUE, 0, get_explicit_type_size( vec2Type ) * vec2Size * threads[ 0 ], resultData[1], 0, NULL, NULL );
error |= clEnqueueReadBuffer( queue, streams[ 5 ], CL_TRUE, 0, get_explicit_type_size( vec3Type ) * vec3Size * threads[ 0 ], resultData[2], 0, NULL, NULL );
test_error( error, "Unable to read result stream" );
// Verify
char *ptr1 = (char *)initData[ 0 ], *ptr2 = (char *)resultData[ 0 ];
size_t span = get_explicit_type_size( vec1Type );
for( i = 0; i < (int)threads[0]; i++ )
{
for( j = 0; j < vec1Size; j++ )
{
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
{
log_error( "ERROR: Value did not validate for component %d of item %d of stream 0!\n", j, i );
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return -1;
}
}
ptr1 += span * vec1Size;
ptr2 += span * vec1Size;
}
ptr1 = (char *)initData[ 1 ];
ptr2 = (char *)resultData[ 1 ];
span = get_explicit_type_size( vec2Type );
for( i = 0; i < (int)threads[0]; i++ )
{
for( j = 0; j < vec2Size; j++ )
{
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
{
log_error( "ERROR: Value did not validate for component %d of item %d of stream 1!\n", j, i );
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return -1;
}
}
ptr1 += span * vec2Size;
ptr2 += span * vec2Size;
}
ptr1 = (char *)initData[ 2 ];
ptr2 = (char *)resultData[ 2 ];
span = get_explicit_type_size( vec3Type );
for( i = 0; i < (int)threads[0]; i++ )
{
for( j = 0; j < vec3Size; j++ )
{
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
{
log_error( "ERROR: Value did not validate for component %d of item %d of stream 2!\n", j, i );
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return -1;
}
}
ptr1 += span * vec3Size;
ptr2 += span * vec3Size;
}
// If we got here, everything verified successfully
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return 0;
}
int test_kernel_arg_multi_setup_exhaustive(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
// Loop through every combination of input and output types
ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
int type1, type2, type3;
int size1, size2, size3;
RandomSeed seed( gRandomSeed );
log_info( "\n" ); // for formatting
for( type1 = 0; types[ type1 ] != kNumExplicitTypes; type1++ )
{
for( type2 = 0; types[ type2 ] != kNumExplicitTypes; type2++ )
{
for( type3 = 0; types[ type3 ] != kNumExplicitTypes; type3++ )
{
log_info( "\n\ttesting %s, %s, %s...", get_explicit_type_name( types[ type1 ] ), get_explicit_type_name( types[ type2 ] ), get_explicit_type_name( types[ type3 ] ) );
// Loop through every combination of vector size
for( size1 = 2; size1 <= 8; size1 <<= 1 )
{
for( size2 = 2; size2 <= 8; size2 <<= 1 )
{
for( size3 = 2; size3 <= 8; size3 <<= 1 )
{
log_info(".");
fflush( stdout);
if( test_multi_arg_set( device, context, queue,
types[ type1 ], size1,
types[ type2 ], size2,
types[ type3 ], size3, seed ) )
return -1;
}
}
}
}
}
}
log_info( "\n" );
return 0;
}
int test_kernel_arg_multi_setup_random(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
// Loop through a selection of combinations
ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
int type1, type2, type3;
int size1, size2, size3;
RandomSeed seed( gRandomSeed );
num_elements = 3*3*3*4;
log_info( "Testing %d random configurations\n", num_elements );
// Loop through every combination of vector size
for( size1 = 2; size1 <= 8; size1 <<= 1 )
{
for( size2 = 2; size2 <= 8; size2 <<= 1 )
{
for( size3 = 2; size3 <= 8; size3 <<= 1 )
{
// Loop through 4 type combinations for each size combination
int n;
for (n=0; n<4; n++) {
type1 = (int)get_random_float(0,4, seed);
type2 = (int)get_random_float(0,4, seed);
type3 = (int)get_random_float(0,4, seed);
log_info( "\ttesting %s%d, %s%d, %s%d...\n",
get_explicit_type_name( types[ type1 ] ), size1,
get_explicit_type_name( types[ type2 ] ), size2,
get_explicit_type_name( types[ type3 ] ), size3 );
if( test_multi_arg_set( device, context, queue,
types[ type1 ], size1,
types[ type2 ], size2,
types[ type3 ], size3, seed ) )
return -1;
}
}
}
}
return 0;
}

View File

@@ -0,0 +1,695 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/conversions.h"
extern cl_uint gRandomSeed;
const char *sample_single_test_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
const char *sample_struct_test_kernel[] = {
"typedef struct {\n"
"__global int *A;\n"
"__global int *B;\n"
"} input_pair_t;\n"
"\n"
"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src->A[tid] + src->B[tid];\n"
"\n"
"}\n" };
const char *sample_struct_array_test_kernel[] = {
"typedef struct {\n"
"int A;\n"
"int B;\n"
"} input_pair_t;\n"
"\n"
"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src[tid].A + src[tid].B;\n"
"\n"
"}\n" };
const char *sample_const_test_kernel[] = {
"__kernel void sample_test(__constant int *src1, __constant int *src2, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src1[tid] + src2[tid];\n"
"\n"
"}\n" };
const char *sample_const_global_test_kernel[] = {
"__constant int addFactor = 1024;\n"
"__kernel void sample_test(__global int *src1, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src1[tid] + addFactor;\n"
"\n"
"}\n" };
const char *sample_two_kernel_program[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n",
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n" };
int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program, testProgram;
cl_context testContext;
cl_kernel kernel;
cl_char name[ 512 ];
cl_uint numArgs, numInstances;
size_t paramSize;
/* Create reference */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, NULL, 0, &paramSize );
test_error( error, "Unable to get kernel function name param size" );
if( paramSize != strlen( "sample_test" ) + 1 )
{
log_error( "ERROR: Kernel function name param returns invalid size (expected %d, got %d)\n", (int)strlen( "sample_test" ) + 1, (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( name ), name, NULL );
test_error( error, "Unable to get kernel function name" );
if( strcmp( (char *)name, "sample_test" ) != 0 )
{
log_error( "ERROR: Kernel function name returned invalid value (expected sample_test, got %s)\n", (char *)name );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &paramSize );
test_error( error, "Unable to get kernel arg count param size" );
if( paramSize != sizeof( numArgs ) )
{
log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numArgs ), (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
test_error( error, "Unable to get kernel arg count" );
if( numArgs != 2 )
{
log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d)\n", 2, numArgs );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, 0, NULL, &paramSize );
test_error( error, "Unable to get kernel reference count param size" );
if( paramSize != sizeof( numInstances ) )
{
log_error( "ERROR: Kernel reference count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numInstances ), (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL );
test_error( error, "Unable to get kernel reference count" );
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, NULL, 0, &paramSize );
test_error( error, "Unable to get kernel program param size" );
if( paramSize != sizeof( testProgram ) )
{
log_error( "ERROR: Kernel program param returns invalid size (expected %d, got %d)\n", (int)sizeof( testProgram ), (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( testProgram ), &testProgram, NULL );
test_error( error, "Unable to get kernel program" );
if( testProgram != program )
{
log_error( "ERROR: Kernel program returned invalid value (expected %p, got %p)\n", program, testProgram );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( testContext ), &testContext, NULL );
test_error( error, "Unable to get kernel context" );
if( testContext != context )
{
log_error( "ERROR: Kernel context returned invalid value (expected %p, got %p)\n", context, testContext );
return -1;
}
/* Release memory */
clReleaseKernel( kernel );
clReleaseProgram( program );
return 0;
}
int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_float inputData[100];
cl_int outputData[100];
RandomSeed seed( gRandomSeed );
int i;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 100, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 100, NULL, &error);
test_error( error, "Creating test array failed" );
/* Write some test data */
memset( outputData, 0, sizeof( outputData ) );
for (i=0; i<100; i++)
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*100, (void *)inputData, 0, NULL, NULL);
test_error( error, "Unable to set testing kernel data" );
/* Set the arguments */
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set kernel arguments" );
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
test_error( error, "Unable to set kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)100;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
/* Try again */
if( localThreads[0] > 1 )
localThreads[0] /= 2;
while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
localThreads[0]--;
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
/* And again */
if( localThreads[0] > 1 )
localThreads[0] /= 2;
while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
localThreads[0]--;
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
/* One more time */
localThreads[0] = (unsigned int)1;
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
return 0;
}
int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_float inputData[10];
cl_int outputData[10];
RandomSeed seed( gRandomSeed );
int i;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Write some test data */
memset( outputData, 0, sizeof( outputData ) );
for (i=0; i<10; i++)
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
test_error( error, "Unable to set testing kernel data" );
/* Test setting the arguments by index manually */
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
return 0;
}
int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_kernel kernel;
void *args[2];
cl_mem outStream;
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
cl_int randomTestDataA[10], randomTestDataB[10];
MTdata d;
struct img_pair_t
{
cl_mem streamA;
cl_mem streamB;
} image_pair;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
randomTestDataA[i] = (cl_int)genrand_int32(d);
randomTestDataB[i] = (cl_int)genrand_int32(d);
}
free_mtdata(d); d = NULL;
image_pair.streamA = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
test_error( error, "Creating test array failed" );
image_pair.streamB = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
test_error( error, "Creating test array failed" );
outStream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
args[0] = &image_pair;
args[1] = outStream;
error = clSetKernelArg(kernel, 0, sizeof( image_pair ), &image_pair);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), &args[1]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
{
log_error( "ERROR: Data did not verify!\n" );
return -1;
}
}
clReleaseMemObject( image_pair.streamA );
clReleaseMemObject( image_pair.streamB );
clReleaseMemObject( outStream );
clReleaseKernel( kernel );
clReleaseProgram( program );
return 0;
}
int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[3];
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
cl_int randomTestDataA[10], randomTestDataB[10];
cl_ulong maxSize;
MTdata d;
/* Verify our test buffer won't be bigger than allowed */
error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
test_error( error, "Unable to get max constant buffer size" );
if( maxSize < sizeof( cl_int ) * 10 )
{
log_error( "ERROR: Unable to test constant argument to kernel: max size of constant buffer is reported as %d!\n", (int)maxSize );
return -1;
}
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffffff; /* Make sure values are positive, just so we don't have to */
randomTestDataB[i] = (cl_int)genrand_int32(d) & 0xffffff; /* deal with overflow on the verification */
}
free_mtdata(d); d = NULL;
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
test_error( error, "Creating test array failed" );
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
{
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + %d (%d)\n", i, outputData[i], randomTestDataA[i], randomTestDataB[i], ( randomTestDataA[i] + randomTestDataB[i] ) );
return -1;
}
}
return 0;
}
int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
MTdata d;
typedef struct img_pair_type
{
int A;
int B;
} image_pair_t;
image_pair_t image_pair[ 10 ];
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_array_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
image_pair[i].A = (cl_int)genrand_int32(d);
image_pair[i].A = (cl_int)genrand_int32(d);
}
free_mtdata(d); d = NULL;
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(image_pair_t) * 10, (void *)image_pair, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != image_pair[i].A + image_pair[i].B)
{
log_error( "ERROR: Data did not verify!\n" );
return -1;
}
}
return 0;
}
int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_kernel kernel[3];
unsigned int kernelCount;
error = create_single_kernel_helper(context, &program, NULL, 2, sample_two_kernel_program, NULL);
test_error(error, "Unable to build test program");
/* Try getting the kernel count */
error = clCreateKernelsInProgram( program, 0, NULL, &kernelCount );
test_error( error, "Unable to get kernel count for built program" );
if( kernelCount != 2 )
{
log_error( "ERROR: Returned kernel count from clCreateKernelsInProgram is incorrect! (got %d, expected 2)\n", kernelCount );
return -1;
}
/* Try actually getting the kernels */
error = clCreateKernelsInProgram( program, 2, kernel, NULL );
test_error( error, "Unable to get kernels for built program" );
clReleaseKernel( kernel[0] );
clReleaseKernel( kernel[1] );
clReleaseProgram( program );
return 0;
}
int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
cl_int randomTestDataA[10];
MTdata d;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_global_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffff; /* Make sure values are positive and small, just so we don't have to */
}
free_mtdata(d); d = NULL;
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != randomTestDataA[i] + 1024)
{
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + 1024 (%d)\n", i, outputData[i], randomTestDataA[i], ( randomTestDataA[i] + 1024 ) );
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,756 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/testHarness.h"
extern cl_uint gRandomSeed;
#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast ) \
error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get mem object " name ); \
if( val != expected ) \
{ \
log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type " from %s:%d)\n", \
expected, (cast)val, __FILE__, __LINE__ ); \
return -1; \
} \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d from %s:%d)\n", \
(int)sizeof( val ), (int)size , __FILE__, __LINE__ ); \
return -1; \
}
static void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void * data )
{
free( data );
}
static unsigned int
get_image_dim(MTdata *d, unsigned int mod)
{
unsigned int val = 0;
do
{
val = (unsigned int)genrand_int32(*d) % mod;
} while (val == 0);
return val;
}
int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
int error;
size_t size;
void * buffer = NULL;
clMemWrapper bufferObject;
clMemWrapper subBufferObject;
cl_mem_flags bufferFlags[] = {
CL_MEM_READ_WRITE,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_ONLY,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_WRITE_ONLY,
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
};
cl_mem_flags subBufferFlags[] = {
CL_MEM_READ_WRITE,
CL_MEM_READ_ONLY,
CL_MEM_WRITE_ONLY,
0,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_READ_ONLY | 0,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_WRITE_ONLY | 0,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_NO_ACCESS | 0,
};
// Get the address alignment, so we can make sure the sub-buffer test later works properly.
cl_uint addressAlignBits;
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(addressAlignBits), &addressAlignBits, NULL );
size_t addressAlign = addressAlignBits/8;
if ( addressAlign < 128 )
{
addressAlign = 128;
}
for ( unsigned int i = 0; i < sizeof(bufferFlags) / sizeof(cl_mem_flags); ++i )
{
//printf("@@@ bufferFlags[%u]=0x%x\n", i, bufferFlags[ i ]);
if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
{
// Create a buffer object to test against.
buffer = malloc( addressAlign * 4 );
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
if ( error )
{
free( buffer );
test_error( error, "Unable to create buffer (CL_MEM_USE_HOST_PTR) to test with" );
}
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
test_error( error, "Unable to set mem object destructor callback" );
void * ptr;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_HOST_PTR, ptr, buffer, "host pointer", "%p", void * )
}
else if ( (bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR) && (bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR) )
{
// Create a buffer object to test against.
buffer = malloc( addressAlign * 4 );
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
if ( error )
{
free( buffer );
test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
test_error( error, "Unable to set mem object destructor callback" );
}
else if ( bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR )
{
// Create a buffer object to test against.
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR) to test with" );
}
else if ( bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR )
{
// Create a buffer object to test against.
buffer = malloc( addressAlign * 4 );
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
if ( error )
{
free( buffer );
test_error( error, "Unable to create buffer (CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
test_error( error, "Unable to set mem object destructor callback" );
}
else
{
// Create a buffer object to test against.
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
test_error( error, "Unable to create buffer to test with" );
}
// Perform buffer object queries.
cl_mem_object_type type;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
cl_mem_flags flags;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_FLAGS, flags, (unsigned int)bufferFlags[ i ], "flags", "%d", unsigned int )
size_t sz;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign * 4 ), "size", "%ld", size_t )
cl_uint mapCount;
error = clGetMemObjectInfo( bufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
test_error( error, "Unable to get mem object map count" );
if( size != sizeof( mapCount ) )
{
log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
cl_uint refCount;
error = clGetMemObjectInfo( bufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get mem object reference count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
cl_context otherCtx;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
cl_mem origObj;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (void *)NULL, "associated mem object", "%p", void * )
size_t offset;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
cl_buffer_region region;
region.origin = addressAlign;
region.size = addressAlign;
// Loop over possible sub-buffer objects to create.
for ( unsigned int j = 0; j < sizeof(subBufferFlags) / sizeof(cl_mem_flags); ++j )
{
if ( subBufferFlags[ j ] & CL_MEM_READ_WRITE )
{
if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) )
continue; // Buffer must be read_write for sub-buffer to be read_write.
}
if ( subBufferFlags[ j ] & CL_MEM_READ_ONLY )
{
if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_READ_ONLY) )
continue; // Buffer must be read_write or read_only for sub-buffer to be read_only
}
if ( subBufferFlags[ j ] & CL_MEM_WRITE_ONLY )
{
if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_WRITE_ONLY) )
continue; // Buffer must be read_write or write_only for sub-buffer to be write_only
}
if ( subBufferFlags[ j ] & CL_MEM_HOST_READ_ONLY )
{
if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_WRITE_ONLY) )
continue; // Buffer must be host all access or host read_only for sub-buffer to be host read_only
}
if ( subBufferFlags[ j ] & CL_MEM_HOST_WRITE_ONLY )
{
if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_READ_ONLY) )
continue; // Buffer must be host all access or host write_only for sub-buffer to be host write_only
}
//printf("@@@ bufferFlags[%u]=0x%x subBufferFlags[%u]=0x%x\n", i, bufferFlags[ i ], j, subBufferFlags[ j ]);
subBufferObject = clCreateSubBuffer( bufferObject, subBufferFlags[ j ], CL_BUFFER_CREATE_TYPE_REGION, &region, &error );
test_error( error, "Unable to create sub-buffer to test against" );
// Perform sub-buffer object queries.
cl_mem_object_type type;
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
cl_mem_flags flags;
cl_mem_flags inheritedFlags = subBufferFlags[ j ];
if ( (subBufferFlags[ j ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY)) == 0 )
{
inheritedFlags |= bufferFlags[ i ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY);
}
inheritedFlags |= bufferFlags[ i ] & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR);
if ( (subBufferFlags[ j ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) == 0)
{
inheritedFlags |= bufferFlags[ i ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS);
}
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_FLAGS, flags, (unsigned int)inheritedFlags, "flags", "%d", unsigned int )
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign ), "size", "%ld", size_t )
if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
{
void * ptr;
void * offsetInBuffer = (char *)buffer + addressAlign;
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_HOST_PTR, ptr, offsetInBuffer, "host pointer", "%p", void * )
}
cl_uint mapCount;
error = clGetMemObjectInfo( subBufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
test_error( error, "Unable to get mem object map count" );
if( size != sizeof( mapCount ) )
{
log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
cl_uint refCount;
error = clGetMemObjectInfo( subBufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get mem object reference count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
cl_context otherCtx;
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (cl_mem)bufferObject, "associated mem object", "%p", void * )
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_OFFSET, offset, (size_t)( addressAlign ), "offset", "%ld", size_t )
clReleaseMemObject( subBufferObject );
subBufferObject = NULL;
}
clReleaseMemObject( bufferObject );
bufferObject = NULL;
}
return CL_SUCCESS;
}
int test_get_imageObject_info( cl_mem * image, cl_mem_flags objectFlags, cl_image_desc *imageInfo, cl_image_format *imageFormat, size_t pixelSize, cl_context context )
{
int error;
size_t size;
cl_mem_object_type type;
cl_mem_flags flags;
cl_uint mapCount;
cl_uint refCount;
size_t rowPitchMultiplier;
size_t slicePitchMultiplier;
cl_context otherCtx;
size_t offset;
size_t sz;
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_TYPE, type, imageInfo->image_type, "type", "%d", int )
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_FLAGS, flags, (unsigned int)objectFlags, "flags", "%d", unsigned int )
error = clGetMemObjectInfo( *image, CL_MEM_SIZE, sizeof( sz ), &sz, NULL );
test_error( error, "Unable to get mem size" );
// The size returned is not constrained by the spec.
error = clGetMemObjectInfo( *image, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
test_error( error, "Unable to get mem object map count" );
if( size != sizeof( mapCount ) )
{
log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
error = clGetMemObjectInfo( *image, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get mem object reference count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
return CL_SUCCESS;
}
int test_get_image_info( cl_device_id deviceID, cl_context context, cl_mem_object_type type )
{
int error;
size_t size;
void * image = NULL;
cl_mem imageObject;
cl_image_desc imageInfo;
cl_mem_flags imageFlags[] = {
CL_MEM_READ_WRITE,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_ONLY,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_WRITE_ONLY,
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
};
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
cl_image_format imageFormat;
size_t pixelSize = 4;
imageFormat.image_channel_order = CL_RGBA;
imageFormat.image_channel_data_type = CL_UNORM_INT8;
imageInfo.image_width = imageInfo.image_height = imageInfo.image_depth = 1;
imageInfo.image_array_size = 0;
imageInfo.num_mip_levels = imageInfo.num_samples = 0;
imageInfo.mem_object = NULL;
d = init_genrand( gRandomSeed );
for ( unsigned int i = 0; i < sizeof(imageFlags) / sizeof(cl_mem_flags); ++i )
{
imageInfo.image_row_pitch = 0;
imageInfo.image_slice_pitch = 0;
switch (type)
{
case CL_MEM_OBJECT_IMAGE1D:
imageInfo.image_width = get_image_dim(&d, 1023);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D;
break;
case CL_MEM_OBJECT_IMAGE2D:
imageInfo.image_width = get_image_dim(&d, 1023);
imageInfo.image_height = get_image_dim(&d, 1023);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D;
break;
case CL_MEM_OBJECT_IMAGE3D:
error = checkFor3DImageSupport(deviceID);
if (error == CL_IMAGE_FORMAT_NOT_SUPPORTED)
{
log_info("Device doesn't support 3D images. Skipping test.\n");
return CL_SUCCESS;
}
imageInfo.image_width = get_image_dim(&d, 127);
imageInfo.image_height = get_image_dim(&d, 127);
imageInfo.image_depth = get_image_dim(&d, 127);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE3D;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
imageInfo.image_width = get_image_dim(&d, 1023);
imageInfo.image_array_size = get_image_dim(&d, 1023);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
imageInfo.image_width = get_image_dim(&d, 255);
imageInfo.image_height = get_image_dim(&d, 255);
imageInfo.image_array_size = get_image_dim(&d, 255);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
break;
}
if ( imageFlags[i] & CL_MEM_USE_HOST_PTR )
{
// Create an image object to test against.
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_USE_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
void * ptr;
TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
// release image object
clReleaseMemObject(imageObject);
// Try again with non-zero rowPitch.
imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
switch (type)
{
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
case CL_MEM_OBJECT_IMAGE3D:
imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
break;
}
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image2d (CL_MEM_USE_HOST_PTR) to test with" );
}
// Make sure image2d is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
else if ( (imageFlags[i] & CL_MEM_ALLOC_HOST_PTR) && (imageFlags[i] & CL_MEM_COPY_HOST_PTR) )
{
// Create an image object to test against.
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
int ret = test_get_imageObject_info( &imageObject, imageFlags[ i ], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
// release image object
clReleaseMemObject(imageObject);
// Try again with non-zero rowPitch.
imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
switch (type)
{
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
case CL_MEM_OBJECT_IMAGE3D:
imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
break;
}
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
else if ( imageFlags[i] & CL_MEM_ALLOC_HOST_PTR )
{
// Create an image object to test against.
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR) to test with" );
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
else if ( imageFlags[i] & CL_MEM_COPY_HOST_PTR )
{
// Create an image object to test against.
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
clReleaseMemObject(imageObject);
// Try again with non-zero rowPitch.
imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
switch (type)
{
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
case CL_MEM_OBJECT_IMAGE3D:
imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
break;
}
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
else
{
// Create an image object to test against.
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
test_error( error, "Unable to create image to test with" );
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
clReleaseMemObject( imageObject );
}
return CL_SUCCESS;
}
int test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D);
}
int test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE3D);
}
int test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D);
}
int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D_ARRAY);
}
int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D_ARRAY);
}

View File

@@ -0,0 +1,108 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
static volatile cl_int sDestructorIndex;
void CL_CALLBACK mem_destructor_callback( cl_mem memObject, void * userData )
{
int * userPtr = (int *)userData;
// ordering of callbacks is guaranteed, meaning we don't need to do atomic operation here
*userPtr = ++sDestructorIndex;
}
#ifndef ABS
#define ABS( x ) ( ( x < 0 ) ? -x : x )
#endif
int test_mem_object_destructor_callback_single( clMemWrapper &memObject )
{
cl_int error;
int i;
// Set up some variables to catch the order in which callbacks are called
volatile int callbackOrders[ 3 ] = { 0, 0, 0 };
sDestructorIndex = 0;
// Set up the callbacks
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 0 ] );
test_error( error, "Unable to set destructor callback" );
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 1 ] );
test_error( error, "Unable to set destructor callback" );
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 2 ] );
test_error( error, "Unable to set destructor callback" );
// Now release the buffer, which SHOULD call the callbacks
error = clReleaseMemObject( memObject );
test_error( error, "Unable to release test buffer" );
// Note: since we manually released the mem wrapper, we need to set it to NULL to prevent a double-release
memObject = NULL;
// At this point, all three callbacks should have already been called
int numErrors = 0;
for( i = 0; i < 3; i++ )
{
// Spin waiting for the release to finish. If you don't call the mem_destructor_callback, you will not
// pass the test. bugzilla 6316
while( 0 == callbackOrders[i] )
{}
if( ABS( callbackOrders[ i ] ) != 3-i )
{
log_error( "\tERROR: Callback %d was called in the wrong order! (Was called order %d, should have been order %d)\n",
i+1, ABS( callbackOrders[ i ] ), i );
numErrors++;
}
}
return ( numErrors > 0 ) ? -1 : 0;
}
int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clMemWrapper testBuffer, testImage;
cl_int error;
// Create a buffer and an image to test callbacks against
testBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, 1024, NULL, &error );
test_error( error, "Unable to create testing buffer" );
if( test_mem_object_destructor_callback_single( testBuffer ) != 0 )
{
log_error( "ERROR: Destructor callbacks for buffer object FAILED\n" );
return -1;
}
if( checkForImageSupport( deviceID ) == 0 )
{
cl_image_format imageFormat = { CL_RGBA, CL_SIGNED_INT8 };
testImage = create_image_2d( context, CL_MEM_READ_ONLY, &imageFormat, 16, 16, 0, NULL, &error );
test_error( error, "Unable to create testing image" );
if( test_mem_object_destructor_callback_single( testImage ) != 0 )
{
log_error( "ERROR: Destructor callbacks for image object FAILED\n" );
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,121 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#ifndef _WIN32
#include <unistd.h>
#endif
#include "../../test_common/harness/conversions.h"
extern cl_uint gRandomSeed;
static void CL_CALLBACK test_native_kernel_fn( void *userData )
{
struct arg_struct {
cl_int * source;
cl_int * dest;
cl_int count;
} *args = (arg_struct *)userData;
for( cl_int i = 0; i < args->count; i++ )
args->dest[ i ] = args->source[ i ];
}
int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
int error;
RandomSeed seed( gRandomSeed );
// Check if we support native kernels
cl_device_exec_capabilities capabilities;
error = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(capabilities), &capabilities, NULL);
if (!(capabilities & CL_EXEC_NATIVE_KERNEL)) {
log_info("Device does not support CL_EXEC_NATIVE_KERNEL.\n");
return 0;
}
clMemWrapper streams[ 2 ];
#if !(defined (_WIN32) && defined (_MSC_VER))
cl_int inBuffer[ n_elems ], outBuffer[ n_elems ];
#else
cl_int* inBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
cl_int* outBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
#endif
clEventWrapper finishEvent;
struct arg_struct
{
cl_mem inputStream;
cl_mem outputStream;
cl_int count;
} args;
// Create some input values
generate_random_data( kInt, n_elems, seed, inBuffer );
// Create I/O streams
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int), inBuffer, &error );
test_error( error, "Unable to create I/O stream" );
streams[ 1 ] = clCreateBuffer( context, 0, n_elems * sizeof(cl_int), NULL, &error );
test_error( error, "Unable to create I/O stream" );
// Set up the arrays to call with
args.inputStream = streams[ 0 ];
args.outputStream = streams[ 1 ];
args.count = n_elems;
void * memLocs[ 2 ] = { &args.inputStream, &args.outputStream };
// Run the kernel
error = clEnqueueNativeKernel( queue, test_native_kernel_fn,
&args, sizeof( args ),
2, &streams[ 0 ],
(const void **)memLocs,
0, NULL, &finishEvent );
test_error( error, "Unable to queue native kernel" );
// Finish and wait for the kernel to complete
error = clFinish( queue );
test_error(error, "clFinish failed");
error = clWaitForEvents( 1, &finishEvent );
test_error(error, "clWaitForEvents failed");
// Now read the results and verify
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, n_elems * sizeof(cl_int), outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
for( int i = 0; i < n_elems; i++ )
{
if( inBuffer[ i ] != outBuffer[ i ] )
{
log_error( "ERROR: Data sample %d for native kernel did not validate (expected %d, got %d)\n",
i, (int)inBuffer[ i ], (int)outBuffer[ i ] );
return 1;
}
}
return 0;
}

View File

@@ -0,0 +1,206 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <stdio.h>
#if defined(__APPLE__)
#include <OpenCL/opencl.h>
#include <OpenCL/cl_platform.h>
#else
#include <CL/opencl.h>
#include <CL/cl_platform.h>
#endif
#include "testBase.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/testHarness.h"
#include "procs.h"
enum { SUCCESS, FAILURE };
typedef enum { NON_NULL_PATH, ADDROF_NULL_PATH, NULL_PATH } test_type;
#define NITEMS 4096
/* places the comparison result of value of the src ptr against 0 into each element of the output
* array, to allow testing that the kernel actually _gets_ the NULL value */
const char *kernel_string_long =
"kernel void test_kernel(global float *src, global long *dst)\n"
"{\n"
" uint tid = get_global_id(0);\n"
" dst[tid] = (long)(src != 0);\n"
"}\n";
// For gIsEmbedded
const char *kernel_string =
"kernel void test_kernel(global float *src, global int *dst)\n"
"{\n"
" uint tid = get_global_id(0);\n"
" dst[tid] = (int)(src != 0);\n"
"}\n";
/*
* The guts of the test:
* call setKernelArgs with a regular buffer, &NULL, or NULL depending on
* the value of 'test_type'
*/
static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel,
cl_mem test_buf, cl_mem result_buf, test_type type)
{
unsigned int test_success = 0;
unsigned int i;
cl_int status;
char *typestr;
if (type == NON_NULL_PATH) {
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
typestr = "non-NULL";
} else if (type == ADDROF_NULL_PATH) {
test_buf = NULL;
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
typestr = "&NULL";
} else if (type == NULL_PATH) {
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
typestr = "NULL";
}
log_info("Testing setKernelArgs with %s buffer.\n", typestr);
if (status != CL_SUCCESS) {
log_error("clSetKernelArg failed with status: %d\n", status);
return FAILURE; // no point in continuing *this* test
}
size_t global = NITEMS;
status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global,
NULL, 0, NULL, NULL);
test_error(status, "NDRangeKernel failed.");
if (gIsEmbedded)
{
cl_int* host_result = (cl_int*)malloc(NITEMS*sizeof(cl_int));
status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0,
sizeof(cl_int)*NITEMS, host_result, 0, NULL, NULL);
test_error(status, "ReadBuffer failed.");
// in the non-null case, we expect NONZERO values:
if (type == NON_NULL_PATH) {
for (i=0; i<NITEMS; i++) {
if (host_result[i] == 0) {
log_error("failure: item %d in the result buffer was unexpectedly NULL.\n", i);
test_success = FAILURE; break;
}
}
} else if (type == ADDROF_NULL_PATH || type == NULL_PATH) {
for (i=0; i<NITEMS; i++) {
if (host_result[i] != 0) {
log_error("failure: item %d in the result buffer was unexpectedly non-NULL.\n", i);
test_success = FAILURE; break;
}
}
}
free(host_result);
}
else
{
cl_long* host_result = (cl_long*)malloc(NITEMS*sizeof(cl_long));
status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0,
sizeof(cl_long)*NITEMS, host_result, 0, NULL, NULL);
test_error(status, "ReadBuffer failed.");
// in the non-null case, we expect NONZERO values:
if (type == NON_NULL_PATH) {
for (i=0; i<NITEMS; i++) {
if (host_result[i] == 0) {
log_error("failure: item %d in the result buffer was unexpectedly NULL.\n", i);
test_success = FAILURE; break;
}
}
} else if (type == ADDROF_NULL_PATH || type == NULL_PATH) {
for (i=0; i<NITEMS; i++) {
if (host_result[i] != 0) {
log_error("failure: item %d in the result buffer was unexpectedly non-NULL.\n", i);
test_success = FAILURE; break;
}
}
}
free(host_result);
}
if (test_success == SUCCESS) {
log_info("\t%s ok.\n", typestr);
}
return test_success;
}
int test_null_buffer_arg(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
unsigned int test_success = 0;
unsigned int i;
unsigned int buffer_size;
cl_int status;
cl_program program;
cl_kernel kernel;
// prep kernel:
if (gIsEmbedded)
status = create_single_kernel_helper(context, &program, NULL, 1, &kernel_string, NULL);
else
status = create_single_kernel_helper(context, &program, NULL, 1, &kernel_string_long, NULL);
test_error(status, "Unable to build test program");
kernel = clCreateKernel(program, "test_kernel", &status);
test_error(status, "CreateKernel failed.");
cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float),
NULL, NULL);
if (gIsEmbedded)
buffer_size = NITEMS*sizeof(cl_int);
else
buffer_size = NITEMS*sizeof(cl_long);
cl_mem dev_dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, buffer_size,
NULL, NULL);
// set the destination buffer normally:
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_dst);
test_error(status, "SetKernelArg failed.");
//
// we test three cases:
//
// - typical case, used everyday: non-null buffer
// - the case of src as &NULL (the spec-compliance test)
// - the case of src as NULL (the backwards-compatibility test, Apple only)
//
test_success = test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NON_NULL_PATH);
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, ADDROF_NULL_PATH);
#ifdef __APPLE__
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NULL_PATH);
#endif
// clean up:
if (dev_src) clReleaseMemObject(dev_src);
clReleaseMemObject(dev_dst);
clReleaseKernel(kernel);
clReleaseProgram(program);
return test_success;
}

View File

@@ -0,0 +1,289 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include <string.h>
#define EXTENSION_NAME_BUF_SIZE 4096
#define PRINT_EXTENSION_INFO 0
int test_platform_extensions(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
const char * extensions[] = {
"cl_khr_byte_addressable_store",
// "cl_APPLE_SetMemObjectDestructor",
"cl_khr_global_int32_base_atomics",
"cl_khr_global_int32_extended_atomics",
"cl_khr_local_int32_base_atomics",
"cl_khr_local_int32_extended_atomics",
"cl_khr_int64_base_atomics",
"cl_khr_int64_extended_atomics",
// need to put in entires for various atomics
"cl_khr_3d_image_writes",
"cl_khr_fp16",
"cl_khr_fp64",
NULL
};
bool extensionsSupported[] = {
false, //"cl_khr_byte_addressable_store",
false, // need to put in entires for various atomics
false, // "cl_khr_global_int32_base_atomics",
false, // "cl_khr_global_int32_extended_atomics",
false, // "cl_khr_local_int32_base_atomics",
false, // "cl_khr_local_int32_extended_atomics",
false, // "cl_khr_int64_base_atomics",
false, // "cl_khr_int64_extended_atomics",
false, //"cl_khr_3d_image_writes",
false, //"cl_khr_fp16",
false, //"cl_khr_fp64",
false //NULL
};
int extensionIndex;
cl_platform_id platformID;
cl_int err;
char platform_extensions[EXTENSION_NAME_BUF_SIZE];
char device_extensions[EXTENSION_NAME_BUF_SIZE];
// Okay, so what we're going to do is just check the device indicated by
// deviceID against the platform that includes this device
// pass CL_DEVICE_PLATFORM to clGetDeviceInfo
// to get a result of type cl_platform_id
err = clGetDeviceInfo(deviceID,
CL_DEVICE_PLATFORM,
sizeof(cl_platform_id),
(void *)(&platformID),
NULL);
if(err != CL_SUCCESS)
{
vlog_error("test_platform_extensions : could not get platformID from device\n");
return -1;
}
// now we grab the set of extensions specified by the platform
err = clGetPlatformInfo(platformID,
CL_PLATFORM_EXTENSIONS,
sizeof(platform_extensions),
(void *)(&platform_extensions[0]),
NULL);
if(err != CL_SUCCESS)
{
vlog_error("test_platform_extensions : could not get extension string from platform\n");
return -1;
}
#if PRINT_EXTENSION_INFO
log_info("Platform extensions include \"%s\"\n\n", platform_extensions);
#endif
// here we parse the platform extensions, to look for the "important" ones
for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
{
if(strstr(platform_extensions, extensions[extensionIndex]) != NULL)
{
// we found it
#if PRINT_EXTENSION_INFO
log_info("Found \"%s\" in platform extensions\n",
extensions[extensionIndex]);
#endif
extensionsSupported[extensionIndex] = true;
}
}
// and then we grab the set of extensions specified by the device
// (this can be turned into a "loop over all devices in this platform")
err = clGetDeviceInfo(deviceID,
CL_DEVICE_EXTENSIONS,
sizeof(device_extensions),
(void *)(&device_extensions[0]),
NULL);
if(err != CL_SUCCESS)
{
vlog_error("test_platform_extensions : could not get extension string from device\n");
return -1;
}
#if PRINT_EXTENSION_INFO
log_info("Device extensions include \"%s\"\n\n", device_extensions);
#endif
for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
{
if(extensionsSupported[extensionIndex] == false)
{
continue; // skip this one
}
if(strstr(device_extensions, extensions[extensionIndex]) == NULL)
{
// device does not support it
vlog_error("Platform supports extension \"%s\" but device does not\n",
extensions[extensionIndex]);
return -1;
}
}
return 0;
}
int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
cl_platform_id platforms[16];
cl_uint num_platforms;
char *string_returned;
string_returned = (char*)malloc(8192);
int total_errors = 0;
int err = CL_SUCCESS;
err = clGetPlatformIDs(16, platforms, &num_platforms);
test_error(err, "clGetPlatformIDs failed");
if (num_platforms <= 16) {
// Try with NULL
err = clGetPlatformIDs(num_platforms, platforms, NULL);
test_error(err, "clGetPlatformIDs failed with NULL for return size");
}
if (num_platforms < 1) {
log_error("Found 0 platforms.\n");
return -1;
}
log_info("Found %d platforms.\n", num_platforms);
for (int p=0; p<(int)num_platforms; p++) {
cl_device_id *devices;
cl_uint num_devices;
size_t size;
log_info("Platform %d (%p):\n", p, platforms[p]);
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_PROFILE, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_PROFILE failed");
log_info("\tCL_PLATFORM_PROFILE: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VERSION, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_VERSION failed");
log_info("\tCL_PLATFORM_VERSION: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_NAME failed");
log_info("\tCL_PLATFORM_NAME: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VENDOR, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_VENDOR failed");
log_info("\tCL_PLATFORM_VENDOR: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_EXTENSIONS, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_EXTENSIONS failed");
log_info("\tCL_PLATFORM_EXTENSIONS: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
test_error(err, "clGetDeviceIDs size failed.\n");
devices = (cl_device_id *)malloc(num_devices*sizeof(cl_device_id));
memset(devices, 0, sizeof(cl_device_id)*num_devices);
err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
test_error(err, "clGetDeviceIDs failed.\n");
log_info("\tPlatform has %d devices.\n", (int)num_devices);
for (int d=0; d<(int)num_devices; d++) {
size_t returned_size;
cl_platform_id returned_platform;
cl_context context;
cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[p], 0 };
err = clGetDeviceInfo(devices[d], CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &returned_platform, &returned_size);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM\n");
if (returned_size != sizeof(cl_platform_id)) {
log_error("Reported return size (%ld) does not match expected size (%ld).\n", returned_size, sizeof(cl_platform_id));
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 8192, string_returned, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_NAME\n");
log_info("\t\tPlatform for device %d (%s) is %p.\n", d, string_returned, returned_platform);
log_info("\t\t\tTesting clCreateContext for the platform/device...\n");
// Try creating a context for the platform
context = clCreateContext(properties, 1, &devices[d], NULL, NULL, &err);
test_error(err, "\t\tclCreateContext failed for device with platform properties\n");
memset(properties, 0, sizeof(cl_context_properties)*3);
err = clGetContextInfo(context, CL_CONTEXT_PROPERTIES, sizeof(cl_context_properties)*3, properties, &returned_size);
test_error(err, "clGetContextInfo for CL_CONTEXT_PROPERTIES failed");
if (returned_size != sizeof(cl_context_properties)*3) {
log_error("Invalid size returned from clGetContextInfo for CL_CONTEXT_PROPERTIES. Got %ld, expected %ld.\n",
returned_size, sizeof(cl_context_properties)*3);
total_errors++;
}
if (properties[0] != (cl_context_properties)CL_CONTEXT_PLATFORM || properties[1] != (cl_context_properties)platforms[p]) {
log_error("Wrong properties returned. Expected: [%p %p], got [%p %p]\n",
(void*)CL_CONTEXT_PLATFORM, platforms[p], (void*)properties[0], (void*)properties[1]);
total_errors++;
}
err = clReleaseContext(context);
test_error(err, "clReleaseContext failed");
}
free(devices);
}
free(string_returned);
return total_errors;
}

View File

@@ -0,0 +1,643 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/imageHelpers.h"
#include <stdlib.h>
#include <ctype.h>
int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_platform_id platform;
cl_int error;
char buffer[ 16384 ];
size_t length;
// Get the platform to use
error = clGetPlatformIDs(1, &platform, NULL);
test_error( error, "Unable to get platform" );
// Platform profile should either be FULL_PROFILE or EMBEDDED_PROFILE
error = clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, sizeof( buffer ), buffer, &length );
test_error( error, "Unable to get platform profile string" );
log_info("Returned CL_PLATFORM_PROFILE %s.\n", buffer);
if( strcmp( buffer, "FULL_PROFILE" ) != 0 && strcmp( buffer, "EMBEDDED_PROFILE" ) != 0 )
{
log_error( "ERROR: Returned platform profile string is not a valid string by OpenCL 1.2! (Returned: %s)\n", buffer );
return -1;
}
if( strlen( buffer )+1 != length )
{
log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
(int)strlen( buffer )+1, (int)length );
return -1;
}
// Check just length return
error = clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, 0, NULL, &length );
test_error( error, "Unable to get platform profile length" );
if( strlen( (char *)buffer )+1 != length )
{
log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
(int)strlen( (char *)buffer )+1, (int)length );
return -1;
}
// Platform version should fit the regex "OpenCL *[0-9]+\.[0-9]+"
error = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof( buffer ), buffer, &length );
test_error( error, "Unable to get platform version string" );
log_info("Returned CL_PLATFORM_VERSION %s.\n", buffer);
if( memcmp( buffer, "OpenCL ", strlen( "OpenCL " ) ) != 0 )
{
log_error( "ERROR: Initial part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
return -1;
}
char *p1 = (char *)buffer + strlen( "OpenCL " );
while( *p1 == ' ' )
p1++;
char *p2 = p1;
while( isdigit( *p2 ) )
p2++;
if( *p2 != '.' )
{
log_error( "ERROR: Numeric part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
return -1;
}
char *p3 = p2 + 1;
while( isdigit( *p3 ) )
p3++;
if( *p3 != ' ' )
{
log_error( "ERROR: space expected after minor version number! (returned: %s)\n", (char *)buffer );
return -1;
}
*p2 = ' '; // Put in a space for atoi below.
p2++;
// make sure it is null terminated
for( ; p3 != buffer + length; p3++ )
if( *p3 == '\0' )
break;
if( p3 == buffer + length )
{
log_error( "ERROR: platform version string is not NUL terminated!\n" );
return -1;
}
int major = atoi( p1 );
int minor = atoi( p2 );
int minor_revision = 2;
if( major * 10 + minor < 10 + minor_revision )
{
log_error( "ERROR: OpenCL profile version returned is less than 1.%d!\n", minor_revision );
return -1;
}
// Sanity checks on the returned values
if( length != strlen( (char *)buffer ) + 1)
{
log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer )+1, (int)length );
return -1;
}
// Check just length
error = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &length );
test_error( error, "Unable to get platform version length" );
if( length != strlen( (char *)buffer )+1 )
{
log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( buffer )+1, (int)length );
return -1;
}
return 0;
}
int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
size_t size;
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
cl_sampler_properties properties[] = {
CL_SAMPLER_NORMALIZED_COORDS, CL_TRUE,
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP,
CL_SAMPLER_FILTER_MODE, CL_FILTER_LINEAR,
0 };
clSamplerWrapper sampler = clCreateSamplerWithProperties(context, properties, &error);
test_error( error, "Unable to create sampler to test with" );
cl_uint refCount;
error = clGetSamplerInfo( sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get sampler ref count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of sampler refcount does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
return -1;
}
cl_context otherCtx;
error = clGetSamplerInfo( sampler, CL_SAMPLER_CONTEXT, sizeof( otherCtx ), &otherCtx, &size );
test_error( error, "Unable to get sampler context" );
if( otherCtx != context )
{
log_error( "ERROR: Sampler context does not validate! (expected %p, got %p)\n", context, otherCtx );
return -1;
}
if( size != sizeof( otherCtx ) )
{
log_error( "ERROR: Returned size of sampler context does not validate! (expected %d, got %d)\n", (int)sizeof( otherCtx ), (int)size );
return -1;
}
cl_addressing_mode mode;
error = clGetSamplerInfo( sampler, CL_SAMPLER_ADDRESSING_MODE, sizeof( mode ), &mode, &size );
test_error( error, "Unable to get sampler addressing mode" );
if( mode != CL_ADDRESS_CLAMP )
{
log_error( "ERROR: Sampler addressing mode does not validate! (expected %d, got %d)\n", (int)CL_ADDRESS_CLAMP, (int)mode );
return -1;
}
if( size != sizeof( mode ) )
{
log_error( "ERROR: Returned size of sampler addressing mode does not validate! (expected %d, got %d)\n", (int)sizeof( mode ), (int)size );
return -1;
}
cl_filter_mode fmode;
error = clGetSamplerInfo( sampler, CL_SAMPLER_FILTER_MODE, sizeof( fmode ), &fmode, &size );
test_error( error, "Unable to get sampler filter mode" );
if( fmode != CL_FILTER_LINEAR )
{
log_error( "ERROR: Sampler filter mode does not validate! (expected %d, got %d)\n", (int)CL_FILTER_LINEAR, (int)fmode );
return -1;
}
if( size != sizeof( fmode ) )
{
log_error( "ERROR: Returned size of sampler filter mode does not validate! (expected %d, got %d)\n", (int)sizeof( fmode ), (int)size );
return -1;
}
cl_int norm;
error = clGetSamplerInfo( sampler, CL_SAMPLER_NORMALIZED_COORDS, sizeof( norm ), &norm, &size );
test_error( error, "Unable to get sampler normalized flag" );
if( norm != CL_TRUE )
{
log_error( "ERROR: Sampler normalized flag does not validate! (expected %d, got %d)\n", (int)CL_TRUE, (int)norm );
return -1;
}
if( size != sizeof( norm ) )
{
log_error( "ERROR: Returned size of sampler normalized flag does not validate! (expected %d, got %d)\n", (int)sizeof( norm ), (int)size );
return -1;
}
return 0;
}
#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast ) \
error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get command queue " name ); \
if( val != expected ) \
{ \
log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val ); \
return -1; \
} \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
return -1; \
}
int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
{
int error;
size_t size;
cl_queue_properties device_props;
cl_queue_properties queue_props[] = {CL_QUEUE_PROPERTIES,0,0};
clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof(device_props), &device_props, NULL);
log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n", (int)device_props);
queue_props[1] = device_props;
clCommandQueueWrapper queue = clCreateCommandQueueWithProperties( context, deviceID, &queue_props[0], &error );
test_error( error, "Unable to create command queue to test with" );
cl_uint refCount;
error = clGetCommandQueueInfo( queue, CL_QUEUE_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get command queue reference count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of command queue reference count does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
return -1;
}
cl_context otherCtx;
TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_CONTEXT, otherCtx, context, "context", "%p", cl_context )
cl_device_id otherDevice;
error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof(otherDevice), &otherDevice, &size);
test_error(error, "clGetCommandQueue failed.");
if (size != sizeof(cl_device_id)) {
log_error( " ERROR: Returned size of command queue CL_QUEUE_DEVICE does not validate! (expected %d, got %d)\n", (int)sizeof( otherDevice ), (int)size );
return -1;
}
/* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */
cl_uint otherDevice_vid, deviceID_vid;
error = clGetDeviceInfo(otherDevice, CL_DEVICE_VENDOR_ID, sizeof(otherDevice_vid), &otherDevice_vid, NULL );
test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL );
test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
if( otherDevice_vid != deviceID_vid )
{
log_error( "ERROR: Incorrect device returned for queue! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, otherDevice_vid );
return -1;
}
cl_command_queue_properties props;
TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_PROPERTIES, props, (unsigned int)( device_props ), "properties", "%d", unsigned int )
return 0;
}
int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
{
int error;
size_t size;
cl_context_properties props;
error = clGetContextInfo( context, CL_CONTEXT_PROPERTIES, sizeof( props ), &props, &size );
test_error( error, "Unable to get context props" );
if (size == 0) {
// Valid size
return 0;
} else if (size == sizeof(cl_context_properties)) {
// Data must be NULL
if (props != 0) {
log_error("ERROR: Returned properties is no NULL.\n");
return -1;
}
// Valid data and size
return 0;
}
// Size was not 0 or 1
log_error( "ERROR: Returned size of context props is not valid! (expected 0 or %d, got %d)\n",
(int)sizeof(cl_context_properties), (int)size );
return -1;
}
#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast ) \
error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get mem object " name ); \
if( val != expected ) \
{ \
log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type ")\n", (cast)(expected), (cast)val ); \
return -1; \
} \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
return -1; \
}
void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
{
free( data );
}
// All possible combinations of valid cl_mem_flags.
static cl_mem_flags all_flags[16] = {
0,
CL_MEM_READ_WRITE,
CL_MEM_READ_ONLY,
CL_MEM_WRITE_ONLY,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
};
#define TEST_DEVICE_PARAM( device, paramName, val, name, type, cast ) \
error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get device " name ); \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
return -1; \
} \
log_info( "\tReported device " name " : " type "\n", (cast)val );
#define TEST_DEVICE_PARAM_MEM( device, paramName, val, name, type, div ) \
error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get device " name ); \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
return -1; \
} \
log_info( "\tReported device " name " : " type "\n", (int)( val / div ) );
int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
{
int error;
size_t size;
cl_uint vendorID;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_VENDOR_ID, vendorID, "vendor ID", "0x%08x", int )
char extensions[ 10240 ];
error = clGetDeviceInfo( deviceID, CL_DEVICE_EXTENSIONS, sizeof( extensions ), &extensions, &size );
test_error( error, "Unable to get device extensions" );
if( size != strlen( extensions ) + 1 )
{
log_error( "ERROR: Returned size of device extensions does not validate! (expected %d, got %d)\n", (int)( strlen( extensions ) + 1 ), (int)size );
return -1;
}
log_info( "\tReported device extensions: %s \n", extensions );
cl_uint preferred;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, preferred, "preferred vector char width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, preferred, "preferred vector short width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, preferred, "preferred vector int width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, preferred, "preferred vector long width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, preferred, "preferred vector float width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, preferred, "preferred vector double width", "%d", int )
// Note that even if cl_khr_fp64, the preferred width for double can be non-zero. For example, vendors
// extensions can support double but may not support cl_khr_fp64, which implies math library support.
cl_uint baseAddrAlign;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, baseAddrAlign, "base address alignment", "%d bytes", int )
cl_uint maxDataAlign;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, maxDataAlign, "min data type alignment", "%d bytes", int )
cl_device_mem_cache_type cacheType;
error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof( cacheType ), &cacheType, &size );
test_error( error, "Unable to get device global mem cache type" );
if( size != sizeof( cacheType ) )
{
log_error( "ERROR: Returned size of device global mem cache type does not validate! (expected %d, got %d)\n", (int)sizeof( cacheType ), (int)size );
return -1;
}
const char *cacheTypeName = ( cacheType == CL_NONE ) ? "CL_NONE" : ( cacheType == CL_READ_ONLY_CACHE ) ? "CL_READ_ONLY_CACHE" : ( cacheType == CL_READ_WRITE_CACHE ) ? "CL_READ_WRITE_CACHE" : "<unknown>";
log_info( "\tReported device global mem cache type: %s \n", cacheTypeName );
cl_uint cachelineSize;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cachelineSize, "global mem cacheline size", "%d bytes", int )
cl_ulong cacheSize;
TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cacheSize, "global mem cache size", "%d KB", 1024 )
cl_ulong memSize;
TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, memSize, "global mem size", "%d MB", ( 1024 * 1024 ) )
cl_device_local_mem_type localMemType;
error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_TYPE, sizeof( localMemType ), &localMemType, &size );
test_error( error, "Unable to get device local mem type" );
if( size != sizeof( cacheType ) )
{
log_error( "ERROR: Returned size of device local mem type does not validate! (expected %d, got %d)\n", (int)sizeof( localMemType ), (int)size );
return -1;
}
const char *localMemTypeName = ( localMemType == CL_LOCAL ) ? "CL_LOCAL" : ( cacheType == CL_GLOBAL ) ? "CL_GLOBAL" : "<unknown>";
log_info( "\tReported device local mem type: %s \n", localMemTypeName );
cl_bool errSupport;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ERROR_CORRECTION_SUPPORT, errSupport, "error correction support", "%d", int )
size_t timerResolution;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PROFILING_TIMER_RESOLUTION, timerResolution, "profiling timer resolution", "%ld nanoseconds", long )
cl_bool endian;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ENDIAN_LITTLE, endian, "little endian flag", "%d", int )
cl_bool avail;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_AVAILABLE, avail, "available flag", "%d", int )
cl_bool compilerAvail;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_COMPILER_AVAILABLE, compilerAvail, "compiler available flag", "%d", int )
char profile[ 1024 ];
error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof( profile ), &profile, &size );
test_error( error, "Unable to get device profile" );
if( size != strlen( profile ) + 1 )
{
log_error( "ERROR: Returned size of device profile does not validate! (expected %d, got %d)\n", (int)( strlen( profile ) + 1 ), (int)size );
return -1;
}
if( strcmp( profile, "FULL_PROFILE" ) != 0 && strcmp( profile, "EMBEDDED_PROFILE" ) != 0 )
{
log_error( "ERROR: Returned profile of device not FULL or EMBEDDED as required by OpenCL 1.2! (Returned %s)\n", profile );
return -1;
}
log_info( "\tReported device profile: %s \n", profile );
return 0;
}
static const char *sample_compile_size[2] = {
"__kernel void sample_test(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" dst[tid] = src[tid];\n"
"\n"
"}\n",
"__kernel __attribute__((reqd_work_group_size(%d,%d,%d))) void sample_test(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" dst[tid] = src[tid];\n"
"\n"
"}\n" };
int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
size_t realSize;
size_t kernel_max_workgroup_size;
size_t global[] = {64,14,10};
size_t local[] = {0,0,0};
cl_uint max_dimensions;
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_dimensions), &max_dimensions, NULL);
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
log_info("Device reported CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = %d.\n", (int)max_dimensions);
{
clProgramWrapper program;
clKernelWrapper kernel;
error = create_single_kernel_helper( context, &program, &kernel, 1, &sample_compile_size[ 0 ], "sample_test" );
if( error != 0 )
return error;
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(kernel_max_workgroup_size), &kernel_max_workgroup_size, NULL);
test_error( error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
log_info("The CL_KERNEL_WORK_GROUP_SIZE for the kernel is %d.\n", (int)kernel_max_workgroup_size);
size_t size[ 3 ];
error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
test_error( error, "Unable to get work group info" );
if( size[ 0 ] != 0 || size[ 1 ] != 0 || size[ 2 ] != 0 )
{
log_error( "ERROR: Nonzero compile work group size returned for nonspecified size! (returned %d,%d,%d)\n", (int)size[0], (int)size[1], (int)size[2] );
return -1;
}
if( realSize != sizeof( size ) )
{
log_error( "ERROR: Returned size of compile work group size not valid! (Expected %d, got %d)\n", (int)sizeof( size ), (int)realSize );
return -1;
}
// Determine some local dimensions to use for the test.
if (max_dimensions == 1) {
error = get_max_common_work_group_size(context, kernel, global[0], &local[0]);
test_error( error, "get_max_common_work_group_size failed");
log_info("For global dimension %d, kernel will require local dimension %d.\n", (int)global[0], (int)local[0]);
} else if (max_dimensions == 2) {
error = get_max_common_2D_work_group_size(context, kernel, global, local);
test_error( error, "get_max_common_2D_work_group_size failed");
log_info("For global dimension %d x %d, kernel will require local dimension %d x %d.\n", (int)global[0], (int)global[1], (int)local[0], (int)local[1]);
} else {
error = get_max_common_3D_work_group_size(context, kernel, global, local);
test_error( error, "get_max_common_3D_work_group_size failed");
log_info("For global dimension %d x %d x %d, kernel will require local dimension %d x %d x %d.\n",
(int)global[0], (int)global[1], (int)global[2], (int)local[0], (int)local[1], (int)local[2]);
}
}
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper in, out;
//char source[1024];
char *source = (char*)malloc(1024);
source[0] = '\0';
sprintf(source, sample_compile_size[1], local[0], local[1], local[2]);
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&source, "sample_test" );
if( error != 0 )
return error;
size_t size[ 3 ];
error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
test_error( error, "Unable to get work group info" );
if( size[ 0 ] != local[0] || size[ 1 ] != local[1] || size[ 2 ] != local[2] )
{
log_error( "ERROR: Incorrect compile work group size returned for specified size! (returned %d,%d,%d, expected %d,%d,%d)\n",
(int)size[0], (int)size[1], (int)size[2], (int)local[0], (int)local[1], (int)local[2]);
return -1;
}
// Verify that the kernel will only execute with that size.
in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*global[0], NULL, &error);
test_error(error, "clCreateBuffer failed");
out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*global[0], NULL, &error);
test_error(error, "clCreateBuffer failed");
error = clSetKernelArg(kernel, 0, sizeof(in), &in);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 1, sizeof(out), &out);
test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
log_info("kernel_required_group_size may report spurious ERRORS in the conformance log.\n");
local[0]++;
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
if (error != CL_INVALID_WORK_GROUP_SIZE) {
log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
(int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2] );
print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
return -1;
}
error = clFinish(queue);
test_error(error, "clFinish failed");
if (max_dimensions == 1) {
free(source);
return 0;
}
local[0]--; local[1]++;
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
if (error != CL_INVALID_WORK_GROUP_SIZE) {
log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
(int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
return -1;
}
error = clFinish(queue);
test_error(error, "clFinish failed");
if (max_dimensions == 2) {
return 0;
free(source);
}
local[1]--; local[2]++;
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
if (error != CL_INVALID_WORK_GROUP_SIZE) {
log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
(int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
return -1;
}
error = clFinish(queue);
test_error(error, "clFinish failed");
free(source);
}
return 0;
}

View File

@@ -0,0 +1,191 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/conversions.h"
#include <sstream>
#include <string>
using namespace std;
/*
*/
const char *queue_hint_test_kernel[] = {
"__kernel void vec_cpy(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src[tid];\n"
"\n"
"}\n" };
int test_enqueue(cl_context context, clCommandQueueWrapper& queue, clKernelWrapper& kernel, size_t num_elements)
{
clMemWrapper streams[2];
int error;
int* buf = new int[num_elements];
for (int i = 0; i < static_cast<int>(num_elements); ++i)
{
buf[i] = i;
}
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, num_elements * sizeof(int), buf, &error);
test_error( error, "clCreateBuffer failed." );
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, num_elements * sizeof(int), NULL, &error);
test_error( error, "clCreateBuffer failed." );
error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
test_error( error, "clSetKernelArg failed." );
error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
test_error( error, "clSetKernelArg failed." );
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &num_elements, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel failed." );
error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, num_elements * sizeof(int), buf, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed." );
for (int i = 0; i < static_cast<int>(num_elements); ++i)
{
if (buf[i] != i)
{
log_error("ERROR: Incorrect vector copy result.");
return -1;
}
}
delete [] buf;
return 0;
}
int test_queue_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
if (num_elements <= 0)
{
num_elements = 128;
}
int err = 0;
// Query extension
cl_platform_id platform;
clProgramWrapper program;
clKernelWrapper kernel;
char *string_returned;
string_returned = (char*)malloc(8192);
err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
test_error(err, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, queue_hint_test_kernel, "vec_cpy", NULL);
if (err != 0)
{
return err;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platform, CL_PLATFORM_EXTENSIONS, 8192, string_returned, NULL);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_EXTENSIONS failed");
log_info("\tCL_PLATFORM_EXTENSIONS: %s\n", string_returned);
string strExt = string_returned;
if (strExt.find("cl_khr_priority_hints") != string::npos)
{
log_info("Testing cl_khr_priority_hints...\n", string_returned);
cl_queue_properties queue_prop[][3] =
{
{
CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR,
0
},
{
CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_MED_KHR,
0
},
{
CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR,
0
}
};
for (int i = 0; i < 3; ++i)
{
clCommandQueueWrapper q = clCreateCommandQueueWithProperties(context, deviceID, queue_prop[i], &err);
test_error(err, "clCreateCommandQueueWithProperties failed");
err = test_enqueue(context, q, kernel, (size_t)num_elements);
if (err != 0)
{
return err;
}
}
}
else
{
log_info("cl_khr_priority_hints is not supported.");
}
if (strExt.find("cl_khr_throttle_hints") != string::npos)
{
cl_queue_properties queue_prop[][3] =
{
{
CL_QUEUE_THROTTLE_KHR, CL_QUEUE_THROTTLE_HIGH_KHR,
0
},
{
CL_QUEUE_THROTTLE_KHR, CL_QUEUE_THROTTLE_MED_KHR,
0
},
{
CL_QUEUE_THROTTLE_KHR, CL_QUEUE_THROTTLE_LOW_KHR,
0
}
};
for (int i = 0; i < 3; ++i)
{
clCommandQueueWrapper q = clCreateCommandQueueWithProperties(context, deviceID, queue_prop[i], &err);
test_error(err, "clCreateCommandQueueWithProperties failed");
err = test_enqueue(context, q, kernel, (size_t)num_elements);
if (err != 0)
{
return err;
}
}
}
else
{
log_info("cl_khr_throttle_hints is not supported.");
}
free(string_returned);
return 0;
}

View File

@@ -0,0 +1,234 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif // !_WIN32
// Note: According to spec, the various functions to get instance counts should return an error when passed in an object
// that has already been released. However, the spec is out of date. If it gets re-updated to allow such action, re-enable
// this define.
//#define VERIFY_AFTER_RELEASE 1
#define GET_QUEUE_INSTANCE_COUNT(p) numInstances = ( (err = clGetCommandQueueInfo(p, CL_QUEUE_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
#define GET_MEM_INSTANCE_COUNT(p) numInstances = ( (err = clGetMemObjectInfo(p, CL_MEM_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
#define VERIFY_INSTANCE_COUNT(c,rightValue) if( c != rightValue ) { \
log_error( "ERROR: Instance count for test object is not valid! (should be %d, really is %d)\n", rightValue, c ); \
return -1; }
int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
{
cl_command_queue queue;
cl_uint numInstances;
int err;
/* Create a test queue */
queue = clCreateCommandQueueWithProperties( context, deviceID, 0, &err );
test_error( err, "Unable to create command queue to test with" );
/* Test the instance count */
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* Now release the program */
clReleaseCommandQueue( queue );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_QUEUE_INSTANCE_COUNT( queue );
if( err != CL_INVALID_COMMAND_QUEUE )
{
print_error( err, "Command queue was not properly released" );
return -1;
}
#endif
return 0;
}
int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
{
cl_command_queue queue;
unsigned int numInstances, i;
int err;
/* Create a test program */
queue = clCreateCommandQueueWithProperties( context, deviceID, 0, &err );
test_error( err, "Unable to create command queue to test with" );
/* Increment 9 times, which should bring the count to 10 */
for( i = 0; i < 9; i++ )
{
clRetainCommandQueue( queue );
}
/* Test the instance count */
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 10 );
/* Now release 5 times, which should take us to 5 */
for( i = 0; i < 5; i++ )
{
clReleaseCommandQueue( queue );
}
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 5 );
/* Retain again three times, which should take us to 8 */
for( i = 0; i < 3; i++ )
{
clRetainCommandQueue( queue );
}
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 8 );
/* Release 7 times, which should take it to 1 */
for( i = 0; i < 7; i++ )
{
clReleaseCommandQueue( queue );
}
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* And one last one */
clReleaseCommandQueue( queue );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_QUEUE_INSTANCE_COUNT( queue );
if( err != CL_INVALID_COMMAND_QUEUE )
{
print_error( err, "Command queue was not properly released" );
return -1;
}
#endif
return 0;
}
int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem object;
cl_uint numInstances;
int err;
/* Create a test object */
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
test_error( err, "Unable to create buffer to test with" );
/* Test the instance count */
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* Now release the program */
clReleaseMemObject( object );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_MEM_INSTANCE_COUNT( object );
if( err != CL_INVALID_MEM_OBJECT )
{
print_error( err, "Mem object was not properly released" );
return -1;
}
#endif
return 0;
}
int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem object;
unsigned int numInstances, i;
int err;
/* Create a test object */
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
test_error( err, "Unable to create buffer to test with" );
/* Increment 9 times, which should bring the count to 10 */
for( i = 0; i < 9; i++ )
{
clRetainMemObject( object );
}
/* Test the instance count */
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 10 );
/* Now release 5 times, which should take us to 5 */
for( i = 0; i < 5; i++ )
{
clReleaseMemObject( object );
}
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 5 );
/* Retain again three times, which should take us to 8 */
for( i = 0; i < 3; i++ )
{
clRetainMemObject( object );
}
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 8 );
/* Release 7 times, which should take it to 1 */
for( i = 0; i < 7; i++ )
{
clReleaseMemObject( object );
}
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* And one last one */
clReleaseMemObject( object );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_MEM_INSTANCE_COUNT( object );
if( err != CL_INVALID_MEM_OBJECT )
{
print_error( err, "Mem object was not properly released" );
return -1;
}
#endif
return 0;
}

View File

@@ -0,0 +1,105 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
#include "../../test_common/harness/compat.h"
int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_program program;
cl_kernel kernel;
int error;
const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" };
/* Create a test program */
error = create_single_kernel_helper(context, &program, NULL, 1, testProgram, NULL);
test_error( error, "Unable to build sample program to test with" );
/* And create a kernel from it */
kernel = clCreateKernel( program, "sample_test", &error );
test_error( error, "Unable to create kernel" );
/* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */
clReleaseProgram( program );
clReleaseKernel( kernel );
/* If we got here fine, we succeeded. If not, well, we won't be able to return an error :) */
return 0;
}
const char *sample_delay_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" for( int i = 0; i < 1000000; i++ ); \n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
int test_release_during_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_kernel kernel;
cl_mem streams[2];
size_t threads[1] = { 10 }, localThreadSize;
/* We now need an event to test. So we'll execute a kernel to get one */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_delay_kernel, "sample_test" ) )
{
return -1;
}
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[ 0 ]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[ 1 ]);
test_error( error, "Unable to set indexed kernel arguments" );
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreadSize );
test_error( error, "Unable to calc local thread size" );
/* Execute the kernel */
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &localThreadSize, 0, NULL, NULL );
test_error( error, "Unable to execute test kernel" );
/* The kernel should still be executing, but we should still be able to release it. It's not terribly
useful, but we should be able to do it, if the internal refcounting is indeed correct. */
clReleaseMemObject( streams[ 1 ] );
clReleaseMemObject( streams[ 0 ] );
clReleaseKernel( kernel );
clReleaseProgram( program );
/* Now make sure we're really finished before we go on. */
error = clFinish(queue);
test_error( error, "Unable to finish context.");
return 0;
}

View File

@@ -0,0 +1,218 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/conversions.h"
const char *subgroup_dispatch_kernel[] = {
"__kernel void subgroup_dispatch_kernel(__global int *output)\n"
"{\n"
" size_t size = get_num_sub_groups ();\n"
"\n"
" output[0] = size;\n"
"\n"
"}\n" };
size_t flatten_ndrange(size_t* ndrange, size_t dim)
{
switch(dim)
{
case 1:
return *ndrange;
case 2:
return ndrange[0] * ndrange[1];
case 3:
return ndrange[0] * ndrange[1] * ndrange[2];
default:
log_error("ERROR: bad ndrange value");
return 0;
}
}
cl_int get_sub_group_num(cl_command_queue queue, cl_kernel kernel, clMemWrapper& out, size_t& size, size_t local_size, size_t dim)
{
size_t ndrange[3] = {local_size, 1, 1};
cl_int error = CL_SUCCESS;
size = 0;
error = clSetKernelArg(kernel, 0, sizeof(out), &out);
error += clEnqueueNDRangeKernel(queue, kernel, dim, NULL, ndrange, ndrange, 0, NULL, NULL);
error += clEnqueueReadBuffer(queue, out, CL_TRUE, 0, 4, &size, 0, NULL, NULL);
return error;
}
int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
static const size_t gsize0 = 80;
int i, error;
size_t realSize;
size_t kernel_max_subgroup_size, kernel_subgroup_count;
size_t global[] = {1,1,1};
size_t max_local;
cl_platform_id platform;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper out;
size_t ret_ndrange1d;
size_t ret_ndrange2d[2];
size_t ret_ndrange3d[3];
size_t ret_ndrange2d_flattened;
size_t ret_ndrange3d_flattened;
error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, subgroup_dispatch_kernel, "subgroup_dispatch_kernel", "-cl-std=CL2.0");
if (error != 0)
return error;
out = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(size_t), NULL, &error);
test_error(error, "clCreateBuffer failed");
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_local, NULL);
test_error(error, "clGetDeviceInfo failed");
error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL);
test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM");
// Get the max subgroup size
error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
sizeof(max_local), &max_local, sizeof(kernel_max_subgroup_size), (void *)&kernel_max_subgroup_size, &realSize);
test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE");
log_info("The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE for the kernel is %d.\n", (int)kernel_max_subgroup_size);
if (realSize != sizeof(kernel_max_subgroup_size)) {
log_error( "ERROR: Returned size of max sub group size not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_max_subgroup_size), (int)realSize );
return -1;
}
// Get the number of subgroup for max local size
error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
sizeof(max_local), &max_local, sizeof(kernel_subgroup_count), (void *)&kernel_subgroup_count, &realSize);
test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE");
log_info("The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE for the kernel is %d.\n", (int)kernel_subgroup_count);
if (realSize != sizeof(kernel_subgroup_count)) {
log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
return -1;
}
// test CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT
for (size_t i = kernel_subgroup_count; i > 0; --i)
{
// test all 3 different dimention of requested local size
size_t expect_size = kernel_max_subgroup_size * i;
size_t kernel_ret_size = 0;
error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(i), &i, sizeof(ret_ndrange1d), &ret_ndrange1d, &realSize);
test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
if (realSize != sizeof(ret_ndrange1d)) {
log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
return -1;
}
if (ret_ndrange1d != expect_size)
{
log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange1d );
return -1;
}
error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange1d, 1);
test_error(error, "Failed to query number of subgroups from kernel");
if (i != kernel_ret_size)
{
log_error( "ERROR: Mismatch between requested number of subgroups and what get_num_sub_groups() in kernel returned! (Expected %d, got %d)\n", (int)i, (int)kernel_ret_size );
return -1;
}
error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(i), &i, sizeof(ret_ndrange2d), ret_ndrange2d, &realSize);
test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
if (realSize != sizeof(ret_ndrange2d)) {
log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
return -1;
}
ret_ndrange2d_flattened = flatten_ndrange(ret_ndrange2d, 2);
if (ret_ndrange2d_flattened != expect_size ||
ret_ndrange2d[1] != 1)
{
log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange2d_flattened );
return -1;
}
error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange2d_flattened, 2);
test_error(error, "Failed to query number of subgroups from kernel");
if (i != kernel_ret_size)
{
log_error( "ERROR: Mismatch between requested number of subgroups and what get_num_sub_groups() in kernel returned! (Expected %d, got %d)\n", (int)i, (int)kernel_ret_size );
return -1;
}
error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(i), &i, sizeof(ret_ndrange3d), ret_ndrange3d, &realSize);
test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
if (realSize != sizeof(ret_ndrange3d)) {
log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
return -1;
}
ret_ndrange3d_flattened = flatten_ndrange(ret_ndrange3d, 3);
if (ret_ndrange3d_flattened != expect_size ||
ret_ndrange3d[1] != 1 ||
ret_ndrange3d[2] != 1)
{
log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange3d_flattened );
return -1;
}
error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange3d_flattened, 3);
test_error(error, "Failed to query number of subgroups from kernel");
if (i != kernel_ret_size)
{
log_error( "ERROR: Mismatch between requested number of subgroups and what get_num_sub_groups() in kernel returned! (Expected %d, got %d)\n", (int)i, (int)kernel_ret_size );
return -1;
}
}
// test when input subgroup count exceeds max wg size
size_t large_sg_size = kernel_subgroup_count + 1;
error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), &large_sg_size, sizeof(ret_ndrange1d), &ret_ndrange1d, &realSize);
test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
if (ret_ndrange1d != 0)
{
log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", 0, (int)ret_ndrange1d );
return -1;
}
error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), &large_sg_size, sizeof(ret_ndrange2d), ret_ndrange2d, &realSize);
test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
if (ret_ndrange2d[0] != 0 ||
ret_ndrange2d[1] != 0)
{
log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT!" );
return -1;
}
error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), &large_sg_size, sizeof(ret_ndrange3d), ret_ndrange3d, &realSize);
test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
if (ret_ndrange3d[0] != 0 ||
ret_ndrange3d[1] != 0 ||
ret_ndrange3d[2] != 0)
{
log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT!" );
return -1;
}
return 0;
}

View File

@@ -0,0 +1,209 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/conversions.h"
const char *zero_sized_enqueue_test_kernel[] = {
"__kernel void foo_kernel(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
const int bufSize = 128;
cl_int test_zero_sized_enqueue_and_test_output_buffer(cl_command_queue queue, clKernelWrapper& kernel, clMemWrapper& buf, size_t dim, size_t ndrange[])
{
cl_int error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, ndrange, NULL, 0, NULL, NULL);
if (error != CL_SUCCESS)
{
return error;
}
clFinish(queue);
// check output buffer has not changed.
int* output = reinterpret_cast<int*>(clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, sizeof(int) * bufSize, 0, NULL, NULL, &error));
if (error != CL_SUCCESS)
{
return error;
}
for (int i = 0; i < bufSize; ++i)
{
if (output[i] != 0)
{
log_error( "ERROR: output buffer value has changed.\n" );
return CL_INVALID_OPERATION;
}
}
return clEnqueueUnmapMemObject(queue, buf, output, 0, NULL, NULL);
}
int test_zero_sized_enqueue_helper(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t ndrange1 = 0;
size_t ndrange20[2] = {0, 0};
size_t ndrange21[2] = {1, 0};
size_t ndrange22[2] = {0, 1};
size_t ndrange30[3] = {0, 0, 0};
size_t ndrange31[3] = {1, 0, 0};
size_t ndrange32[3] = {0, 1, 0};
size_t ndrange33[3] = {0, 0, 1};
size_t ndrange34[3] = {0, 1, 1};
size_t ndrange35[3] = {1, 0, 1};
size_t ndrange36[3] = {1, 1, 0};
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufSize * sizeof(int), NULL, &error);
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufSize * sizeof(int), NULL, &error);
int* buf = new int[bufSize];
memset(buf, 0, sizeof(int) * bufSize);
// update output buffer
error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(int) * bufSize, buf, 0, NULL, NULL);
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, zero_sized_enqueue_test_kernel, "foo_kernel" ) != 0 )
{
return -1;
}
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &streams[0]);
test_error( error, "clSetKernelArg failed." );
error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &streams[1]);
test_error( error, "clSetKernelArg failed." );
// Simple API return code tests for 1D, 2D and 3D zero sized ND range.
error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 1, &ndrange1);
test_error( error, "1D zero sized kernel enqueue failed." );
error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 2, ndrange20);
test_error( error, "2D zero sized kernel enqueue failed." );
error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 2, ndrange21);
test_error( error, "2D zero sized kernel enqueue failed." );
error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 2, ndrange22);
test_error( error, "2D zero sized kernel enqueue failed." );
error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange30);
test_error( error, "3D zero sized kernel enqueue failed." );
error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange31);
test_error( error, "3D zero sized kernel enqueue failed." );
error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange32);
test_error( error, "3D zero sized kernel enqueue failed." );
error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange33);
test_error( error, "3D zero sized kernel enqueue failed." );
error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange34);
test_error( error, "3D zero sized kernel enqueue failed." );
error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange35);
test_error( error, "3D zero sized kernel enqueue failed." );
error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange36);
test_error( error, "3D zero sized kernel enqueue failed." );
// Verify zero-sized ND range kernel still satisfy event wait list and correct event object
// is returned
cl_event ev = NULL;
clEventWrapper user_ev = clCreateUserEvent(context, &error);
test_error( error, "user event creation failed." );
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, ndrange30, NULL, 1, &user_ev, &ev);
test_error( error, "3D zero sized kernel enqueue failed." );
if (ev == NULL)
{
log_error( "ERROR: failed to create an event object\n" );
return -1;
}
cl_int sta;
error = clGetEventInfo(ev, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &sta, NULL);
test_error( error, "Failed to get event status.");
if (sta != CL_QUEUED)
{
log_error( "ERROR: incorrect zero sized kernel enqueue event status.\n" );
return -1;
}
// now unblock zero-sized enqueue
error = clSetUserEventStatus(user_ev, CL_COMPLETE);
test_error( error, "Failed to set user event status.");
clFinish(queue);
// now check zero sized enqueue event status
error = clGetEventInfo(ev, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &sta, NULL);
test_error( error, "Failed to get event status.");
if (sta != CL_COMPLETE)
{
log_error( "ERROR: incorrect zero sized kernel enqueue event status.\n" );
return -1;
}
delete [] buf;
return 0;
}
int test_zero_sized_enqueue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int res = test_zero_sized_enqueue_helper(deviceID, context, queue, num_elements);
if (res != 0)
{
return res;
}
// now test out of order queue
cl_command_queue_properties props;
cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &props, NULL);
test_error( error, "clGetDeviceInfo failed.");
if (props | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
{
// test out of order queue
cl_queue_properties queue_prop_def[] =
{
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
0
};
clCommandQueueWrapper ooqueue = clCreateCommandQueueWithProperties(context, deviceID, queue_prop_def, &error);
test_error( error, "clCreateCommandQueueWithProperties failed.");
res = test_zero_sized_enqueue_helper(deviceID, context, ooqueue, num_elements);
}
return res;
}

View File

@@ -0,0 +1,18 @@
set(MODULE_NAME ATOMICS)
set(${MODULE_NAME}_SOURCES
main.c
test_atomics.cpp
test_indexed_cases.c
../../test_common/harness/errorHelpers.c
../../test_common/harness/threadTesting.c
../../test_common/harness/testHarness.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/mt19937.c
../../test_common/harness/conversions.c
../../test_common/harness/msvc9.c
../../test_common/harness/parseParameters.cpp
)
include(../CMakeCommon.txt)

View File

@@ -0,0 +1,17 @@
project
: requirements
<toolset>gcc:<cflags>-xc++
<toolset>msvc:<cflags>"/TP"
;
exe test_atomics
: main.c
test_atomics.c
test_indexed_cases.c
;
install dist
: test_atomics
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/atomics
<variant>release:<location>$(DIST)/release/tests/test_conformance/atomics
;

View File

@@ -0,0 +1,44 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.c \
test_atomics.cpp \
test_indexed_cases.c \
../../test_common/harness/errorHelpers.c \
../../test_common/harness/threadTesting.c \
../../test_common/harness/testHarness.c \
../../test_common/harness/mt19937.c \
../../test_common/harness/conversions.c \
../../test_common/harness/kernelHelpers.c
DEFINES =
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
FRAMEWORK = $(SOURCES)
HEADERS =
TARGET = test_atomics
INCLUDE =
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
CC = c++
CFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,71 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "procs.h"
#include "../../test_common/harness/testHarness.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
basefn basefn_list[] = {
test_atomic_add,
test_atomic_sub,
test_atomic_xchg,
test_atomic_min,
test_atomic_max,
test_atomic_inc,
test_atomic_dec,
test_atomic_cmpxchg,
test_atomic_and,
test_atomic_or,
test_atomic_xor,
test_atomic_add_index,
test_atomic_add_index_bin
};
const char *basefn_names[] = {
"atomic_add",
"atomic_sub",
"atomic_xchg",
"atomic_min",
"atomic_max",
"atomic_inc",
"atomic_dec",
"atomic_cmpxchg",
"atomic_and",
"atomic_or",
"atomic_xor",
"atomic_add_index",
"atomic_add_index_bin",
};
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
int num_fns = sizeof(basefn_names) / sizeof(char *);
int main(int argc, const char *argv[])
{
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
}

View File

@@ -0,0 +1,39 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/threadTesting.h"
#include "../../test_common/harness/typeWrappers.h"
extern int create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
extern int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);

View File

@@ -0,0 +1,31 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _testBase_h
#define _testBase_h
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#endif // _testBase_h

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,380 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/conversions.h"
extern cl_uint gRandomSeed;
const char * atomic_index_source =
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
"// Counter keeps track of which index in counts we are using.\n"
"// We get that value, increment it, and then set that index in counts to our thread ID.\n"
"// At the end of this we should have all thread IDs in some random location in counts\n"
"// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
"// will be missing some.\n"
"\n"
"__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
" int tid = get_global_id(0);\n"
" \n"
" int counter_to_use = atom_add(counter, 1);\n"
" counts[counter_to_use] = tid;\n"
"}";
int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper counter, counters;
size_t numGlobalThreads, numLocalThreads;
int fail = 0, succeed = 0, err;
/* Check if atomics are supported. */
if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
return 0;
}
//===== add_index test
// The index test replicates what particles does.
// It uses one memory location to keep track of the current index and then each thread
// does an atomic add to it to get its new location. The threads then write to their
// assigned location. At the end we check to make sure that each thread's ID shows up
// exactly once in the output.
numGlobalThreads = 2048;
if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) )
return -1;
if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) )
return -1;
log_info("Execute global_threads:%d local_threads:%d\n",
(int)numGlobalThreads, (int)numLocalThreads);
// Create the counter that will keep track of where each thread writes.
counter = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
sizeof(cl_int) * 1, NULL, NULL);
// Create the counters that will hold the results of each thread writing
// its ID into a (hopefully) unique location.
counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
sizeof(cl_int) * numGlobalThreads, NULL, NULL);
// Reset all those locations to -1 to indciate they have not been used.
cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads);
if (values == NULL) {
log_error("add_index_test FAILED to allocate memory for initial values.\n");
fail = 1; succeed = -1;
} else {
memset(values, -1, numLocalThreads);
unsigned int i=0;
for (i=0; i<numGlobalThreads; i++)
values[i] = -1;
int init=0;
err = clEnqueueWriteBuffer(queue, counters, true, 0, numGlobalThreads*sizeof(cl_int), values, 0, NULL, NULL);
err |= clEnqueueWriteBuffer(queue, counter, true, 0,1*sizeof(cl_int), &init, 0, NULL, NULL);
if (err) {
log_error("add_index_test FAILED to write initial values to arrays: %d\n", err);
fail=1; succeed=-1;
} else {
err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
if (err) {
log_error("add_index_test FAILED to set kernel arguments: %d\n", err);
fail=1; succeed=-1;
} else {
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numGlobalThreads, &numLocalThreads, 0, NULL, NULL );
if (err) {
log_error("add_index_test FAILED to execute kernel: %d\n", err);
fail=1; succeed=-1;
} else {
err = clEnqueueReadBuffer( queue, counters, true, 0, sizeof(cl_int)*numGlobalThreads, values, 0, NULL, NULL );
if (err) {
log_error("add_index_test FAILED to read back results: %d\n", err);
fail = 1; succeed=-1;
} else {
unsigned int looking_for, index;
for (looking_for=0; looking_for<numGlobalThreads; looking_for++) {
int instances_found=0;
for (index=0; index<numGlobalThreads; index++) {
if (values[index]==(int)looking_for)
instances_found++;
}
if (instances_found != 1) {
log_error("add_index_test FAILED: wrong number of instances (%d!=1) for counter %d.\n", instances_found, looking_for);
fail = 1; succeed=-1;
}
}
}
}
}
}
if (!fail) {
log_info("add_index_test passed. Each thread used exactly one index.\n");
}
free(values);
}
return fail;
}
const char *add_index_bin_kernel[] = {
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
"// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
"// using an atomic add to keep track of the current location to write into in each bin.\n"
"// This is the same as the memory update for the particles demo.\n"
"\n"
"__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
" int tid = get_global_id(0);\n"
"\n"
" int location = bin_assignments[tid];\n"
" int counter = atom_add(&bin_counters[location], 1);\n"
" bins[location*max_counts_per_bin + counter] = tid;\n"
"}" };
// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel
// using an atomic add to keep track of the current location to write into in each bin.
// This is the same as the memory update for the particles demo.
int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_context context, MTdata d)
{
int number_of_items = (int)global_threads[0];
size_t local_threads[1];
int divisor = 12;
int number_of_bins = number_of_items/divisor;
int max_counts_per_bin = divisor*2;
int fail = 0;
int succeed = 0;
int err;
clProgramWrapper program;
clKernelWrapper kernel;
// log_info("add_index_bin_test: %d items, into %d bins, with a max of %d items per bin (bins is %d long).\n",
// number_of_items, number_of_bins, max_counts_per_bin, number_of_bins*max_counts_per_bin);
//===== add_index_bin test
// The index test replicates what particles does.
err = create_single_kernel_helper(context, &program, &kernel, 1, add_index_bin_kernel, "add_index_bin_test" );
test_error( err, "Unable to create testing kernel" );
if( get_max_common_work_group_size( context, kernel, global_threads[0], &local_threads[0] ) )
return -1;
log_info("Execute global_threads:%d local_threads:%d\n",
(int)global_threads[0], (int)local_threads[0]);
// Allocate our storage
cl_mem bin_counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
sizeof(cl_int) * number_of_bins, NULL, NULL);
cl_mem bins = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
sizeof(cl_int) * number_of_bins*max_counts_per_bin, NULL, NULL);
cl_mem bin_assignments = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_ONLY),
sizeof(cl_int) * number_of_items, NULL, NULL);
if (bin_counters == NULL) {
log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
return -1;
}
if (bins == NULL) {
log_error("add_index_bin_test FAILED to allocate bins.\n");
return -1;
}
if (bin_assignments == NULL) {
log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
return -1;
}
// Initialize our storage
cl_int *l_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
if (!l_bin_counts) {
log_error("add_index_bin_test FAILED to allocate initial values for bin_counters.\n");
return -1;
}
int i;
for (i=0; i<number_of_bins; i++)
l_bin_counts[i] = 0;
err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, l_bin_counts, 0, NULL, NULL);
if (err) {
log_error("add_index_bin_test FAILED to set initial values for bin_counters: %d\n", err);
return -1;
}
cl_int *values = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
if (!values) {
log_error("add_index_bin_test FAILED to allocate initial values for bins.\n");
return -1;
}
for (i=0; i<number_of_bins*max_counts_per_bin; i++)
values[i] = -1;
err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, values, 0, NULL, NULL);
if (err) {
log_error("add_index_bin_test FAILED to set initial values for bins: %d\n", err);
return -1;
}
free(values);
cl_int *l_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_items);
if (!l_bin_assignments) {
log_error("add_index_bin_test FAILED to allocate initial values for l_bin_assignments.\n");
return -1;
}
for (i=0; i<number_of_items; i++) {
int bin = random_in_range(0, number_of_bins-1, d);
while (l_bin_counts[bin] >= max_counts_per_bin) {
bin = random_in_range(0, number_of_bins-1, d);
}
if (bin >= number_of_bins)
log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins);
if (l_bin_counts[bin]+1 > max_counts_per_bin)
log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin);
l_bin_counts[bin]++;
l_bin_assignments[i] = bin;
// log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]);
}
err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL);
if (err) {
log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err);
return -1;
}
// Setup the kernel
err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin);
if (err) {
log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err);
fail=1; succeed=-1;
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
if (err) {
log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
fail=1; succeed=-1;
}
cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
if (!final_bin_assignments) {
log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n");
return -1;
}
err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL );
if (err) {
log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
fail = 1; succeed=-1;
}
cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
if (!final_bin_counts) {
log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n");
return -1;
}
err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL );
if (err) {
log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err);
fail = 1; succeed=-1;
}
// Verification.
int errors=0;
int current_bin;
int search;
// Print out all the contents of the bins.
// for (current_bin=0; current_bin<number_of_bins; current_bin++)
// for (search=0; search<max_counts_per_bin; search++)
// log_info("[bin %d, entry %d] = %d\n", current_bin, search, final_bin_assignments[current_bin*max_counts_per_bin+search]);
// First verify that there are the correct number in each bin.
for (current_bin=0; current_bin<number_of_bins; current_bin++) {
int expected_number = l_bin_counts[current_bin];
int actual_number = final_bin_counts[current_bin];
if (expected_number != actual_number) {
log_error("add_index_bin_test FAILED: bin %d reported %d entries when %d were expected.\n", current_bin, actual_number, expected_number);
errors++;
}
for (search=0; search<expected_number; search++) {
if (final_bin_assignments[current_bin*max_counts_per_bin+search] == -1) {
log_error("add_index_bin_test FAILED: bin %d had no entry at position %d when it should have had %d entries.\n", current_bin, search, expected_number);
errors++;
}
}
for (search=expected_number; search<max_counts_per_bin; search++) {
if (final_bin_assignments[current_bin*max_counts_per_bin+search] != -1) {
log_error("add_index_bin_test FAILED: bin %d had an extra entry at position %d when it should have had only %d entries.\n", current_bin, search, expected_number);
errors++;
}
}
}
// Now verify that the correct ones are in each bin
int index;
for (index=0; index<number_of_items; index++) {
int expected_bin = l_bin_assignments[index];
int found_it = 0;
for (search=0; search<l_bin_counts[expected_bin]; search++) {
if (final_bin_assignments[expected_bin*max_counts_per_bin+search] == index) {
found_it = 1;
}
}
if (found_it == 0) {
log_error("add_index_bin_test FAILED: did not find item %d in bin %d.\n", index, expected_bin);
errors++;
}
}
free(l_bin_counts);
free(l_bin_assignments);
free(final_bin_assignments);
free(final_bin_counts);
clReleaseMemObject(bin_counters);
clReleaseMemObject(bins);
clReleaseMemObject(bin_assignments);
if (errors == 0) {
log_info("add_index_bin_test passed. Each item was put in the correct bin in parallel.\n");
return 0;
} else {
log_error("add_index_bin_test FAILED: %d errors.\n", errors);
return -1;
}
}
int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
//===== add_index_bin test
size_t numGlobalThreads = 2048;
int iteration=0;
int err, failed = 0;
MTdata d = init_genrand( gRandomSeed );
/* Check if atomics are supported. */
if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
free_mtdata( d );
return 0;
}
for(iteration=0; iteration<10; iteration++) {
log_info("add_index_bin_test with %d elements:\n", (int)numGlobalThreads);
err = add_index_bin_test(&numGlobalThreads, queue, context, d);
if (err) {
failed++;
break;
}
numGlobalThreads*=2;
}
free_mtdata( d );
return failed;
}

View File

@@ -0,0 +1,83 @@
set(MODULE_NAME BASIC)
set(${MODULE_NAME}_SOURCES
main.c
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
test_hiloeo.c test_local.c test_pointercast.c
test_if.c test_loop.c
test_readimage.c test_readimage_int16.c test_readimage_fp32.c
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
test_multireadimageonefmt.c test_multireadimagemultifmt.c
test_imagedim.c
test_vloadstore.c
test_int2float.c test_float2int.c
test_createkernelsinprogram.c
test_hostptr.c
test_explicit_s2v.cpp
test_constant.c
test_image_multipass.c
test_imagereadwrite.c test_imagereadwrite3d.c
test_image_param.c
test_imagenpot.c
test_image_r8.c
test_barrier.c
test_basic_parameter_types.c
test_arrayreadwrite.c
test_arraycopy.c
test_imagearraycopy.c
test_imagearraycopy3d.c
test_imagecopy.c
test_imagerandomcopy.c
test_arrayimagecopy.c
test_arrayimagecopy3d.c
test_imagecopy3d.c
test_enqueue_map.cpp
test_work_item_functions.cpp
test_astype.cpp
test_async_copy.cpp
test_sizeof.c
test_vector_creation.cpp
test_vec_type_hint.c
test_numeric_constants.cpp
test_constant_source.cpp
test_bufferreadwriterect.c
test_async_strided_copy.cpp
test_preprocessors.cpp
test_kernel_memory_alignment.cpp
test_global_work_offsets.cpp
test_kernel_call_kernel_function.cpp
test_local_kernel_scope.cpp
test_progvar.cpp
test_wg_barrier.c
test_global_linear_id.c
test_local_linear_id.c
test_enqueued_local_size.c
test_simple_image_pitch.c
test_get_linear_ids.cpp
test_rw_image_access_qualifier.c
../../test_common/harness/errorHelpers.c
../../test_common/harness/threadTesting.c
../../test_common/harness/testHarness.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/typeWrappers.cpp
../../test_common/harness/imageHelpers.cpp
../../test_common/harness/mt19937.c
../../test_common/harness/conversions.c
../../test_common/harness/rounding_mode.c
../../test_common/harness/msvc9.c
test_wg_barrier.c
test_enqueued_local_size.c
test_global_linear_id.c
test_local_linear_id.c
test_progvar.cpp
../../test_common/harness/parseParameters.cpp
)
if(APPLE)
list(APPEND ${MODULE_NAME}_SOURCES test_queue_priority.c)
endif(APPLE)
include(../CMakeCommon.txt)

View File

@@ -0,0 +1,74 @@
project
: requirements
<toolset>gcc:<cflags>-xc++
<toolset>msvc:<cflags>"/TP"
;
exe test_basic
: main.c
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
test_hiloeo.c test_local.c test_pointercast.c
test_if.c test_sizeof.c test_loop.c
test_readimage.c test_readimage_int16.c test_readimage_fp32.c
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
test_multireadimageonefmt.c test_multireadimagemultifmt.c
test_imagedim.c
test_vloadstore.c
test_int2float.c test_float2int.c
test_createkernelsinprogram.c
test_hostptr.c
test_explicit_s2v.cpp
test_constant.c
test_constant_source.cpp
test_image_multipass.c
test_imagereadwrite.c test_imagereadwrite3d.c
test_bufferreadwriterect.c
test_image_param.c
test_imagenpot.c
test_image_r8.c
test_barrier.c
test_arrayreadwrite.c
test_arraycopy.c
test_imagearraycopy.c
test_imagearraycopy3d.c
test_imagecopy.c
test_imagerandomcopy.c
test_arrayimagecopy.c
test_arrayimagecopy3d.c
test_imagecopy3d.c
test_enqueue_map.cpp
test_work_item_functions.cpp
test_astype.cpp
test_async_copy.cpp
test_async_strided_copy.cpp
test_numeric_constants.cpp
test_kernel_call_kernel_function.cpp
test_basic_parameter_types.c
test_vector_creation.cpp
test_vec_type_hint.c
test_preprocessors.cpp
test_kernel_memory_alignment.cpp
test_global_work_offsets.cpp
test_local_kernel_scope.cpp
test_get_linear_ids.cpp
../../test_common/harness/errorHelpers.c
../../test_common/harness/threadTesting.c
../../test_common/harness/testHarness.c
../../test_common/harness/rounding_mode.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/typeWrappers.cpp
../../test_common/harness/imageHelpers.cpp
../../test_common/harness/mt19937.c
../../test_common/harness/conversions.c
: <target-os>windows:<source>../../test_common/harness/msvc9.c
;
install dist
: test_basic
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/basic
<variant>release:<location>$(DIST)/release/tests/test_conformance/basic
;

View File

@@ -0,0 +1,103 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.c \
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c \
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c \
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c \
test_hiloeo.c test_local.c test_local_kernel_scope.cpp test_pointercast.c \
test_if.c test_sizeof.c test_loop.c \
test_readimage.c test_readimage_int16.c test_readimage_fp32.c \
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c \
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c \
test_multireadimageonefmt.c test_multireadimagemultifmt.c \
test_imagedim.c \
test_vloadstore.c \
test_int2float.c test_float2int.c \
test_createkernelsinprogram.c \
test_hostptr.c \
test_explicit_s2v.cpp \
test_constant.c \
test_constant_source.cpp \
test_image_multipass.c \
test_imagereadwrite.c test_imagereadwrite3d.c \
test_bufferreadwriterect.c \
test_image_param.c \
test_imagenpot.c \
test_image_r8.c \
test_barrier.c \
test_wg_barrier.c \
test_arrayreadwrite.c \
test_arraycopy.c \
test_imagearraycopy.c \
test_imagearraycopy3d.c \
test_imagecopy.c \
test_imagerandomcopy.c \
test_arrayimagecopy.c \
test_arrayimagecopy3d.c\
test_imagecopy3d.c \
test_enqueue_map.cpp \
test_work_item_functions.cpp \
test_astype.cpp \
test_async_copy.cpp \
test_async_strided_copy.cpp \
test_numeric_constants.cpp \
test_kernel_call_kernel_function.cpp \
test_basic_parameter_types.c \
test_vector_creation.cpp \
test_vec_type_hint.c \
test_preprocessors.cpp \
test_kernel_memory_alignment.cpp \
test_global_work_offsets.cpp \
test_simple_image_pitch.c \
test_queue_priority.c \
test_global_linear_id.c \
test_local_linear_id.c \
test_enqueued_local_size.c \
test_get_linear_ids.c \
test_progvar.cpp \
test_rw_image_access_qualifier.c \
../../test_common/harness/errorHelpers.c \
../../test_common/harness/threadTesting.c \
../../test_common/harness/testHarness.c \
../../test_common/harness/rounding_mode.c \
../../test_common/harness/kernelHelpers.c \
../../test_common/harness/typeWrappers.cpp \
../../test_common/harness/imageHelpers.cpp \
../../test_common/harness/mt19937.c \
../../test_common/harness/conversions.c
DEFINES =
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
FRAMEWORK = $(SOURCES)
HEADERS =
TARGET = test_basic
INCLUDE =
COMPILERFLAGS = -c -Wall -g -O0 -Wshorten-64-to-32
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,303 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../../test_common/harness/testHarness.h"
#include "procs.h"
// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
// (for example, generate_random_image_data()), the tests are required to declare
// the following variables (<rdar://problem/11111245>):
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
bool gTestRounding = false;
basefn basefn_list[] = {
test_hostptr,
test_fpmath_float,
test_fpmath_float2,
test_fpmath_float4,
test_intmath_int,
test_intmath_int2,
test_intmath_int4,
test_intmath_long,
test_intmath_long2,
test_intmath_long4,
test_hiloeo,
test_if,
test_sizeof,
test_loop,
test_pointer_cast,
test_local_arg_def,
test_local_kernel_def,
test_local_kernel_scope,
test_constant,
test_constant_source,
test_readimage,
test_readimage_int16,
test_readimage_fp32,
test_writeimage,
test_writeimage_int16,
test_writeimage_fp32,
test_multireadimageonefmt,
test_multireadimagemultifmt,
test_image_r8,
test_barrier,
test_wg_barrier,
test_int2float,
test_float2int,
test_imagereadwrite,
test_imagereadwrite3d,
test_readimage3d,
test_readimage3d_int16,
test_readimage3d_fp32,
test_bufferreadwriterect,
test_arrayreadwrite,
test_arraycopy,
test_imagearraycopy,
test_imagearraycopy3d,
test_imagecopy,
test_imagecopy3d,
test_imagerandomcopy,
test_arrayimagecopy,
test_arrayimagecopy3d,
test_imagenpot,
test_vload_global,
test_vload_local,
test_vload_constant,
test_vload_private,
test_vstore_global,
test_vstore_local,
test_vstore_private,
test_createkernelsinprogram,
test_imagedim_pow2,
test_imagedim_non_pow2,
test_image_param,
test_image_multipass_integer_coord,
test_image_multipass_float_coord,
test_explicit_s2v_bool,
test_explicit_s2v_char,
test_explicit_s2v_uchar,
test_explicit_s2v_short,
test_explicit_s2v_ushort,
test_explicit_s2v_int,
test_explicit_s2v_uint,
test_explicit_s2v_long,
test_explicit_s2v_ulong,
test_explicit_s2v_float,
test_explicit_s2v_double,
test_enqueue_map_buffer,
test_enqueue_map_image,
test_work_item_functions,
test_astype,
test_async_copy_global_to_local,
test_async_copy_local_to_global,
test_async_strided_copy_global_to_local,
test_async_strided_copy_local_to_global,
test_prefetch,
test_kernel_call_kernel_function,
test_host_numeric_constants,
test_kernel_numeric_constants,
test_kernel_limit_constants,
test_kernel_preprocessor_macros,
test_basic_parameter_types,
test_vector_creation,
test_vec_type_hint,
test_kernel_memory_alignment_local,
test_kernel_memory_alignment_global,
test_kernel_memory_alignment_constant,
test_kernel_memory_alignment_private,
test_progvar_prog_scope_misc,
test_progvar_prog_scope_uninit,
test_progvar_prog_scope_init,
test_progvar_func_scope,
test_global_work_offsets,
test_get_global_offset,
test_global_linear_id,
test_local_linear_id,
test_enqueued_local_size,
test_simple_read_image_pitch,
test_simple_write_image_pitch,
#if defined( __APPLE__ )
test_queue_priority,
#endif
test_get_linear_ids,
test_rw_image_access_qualifier
};
const char *basefn_names[] = {
"hostptr",
"fpmath_float",
"fpmath_float2",
"fpmath_float4",
"intmath_int",
"intmath_int2",
"intmath_int4",
"intmath_long",
"intmath_long2",
"intmath_long4",
"hiloeo",
"if",
"sizeof",
"loop",
"pointer_cast",
"local_arg_def",
"local_kernel_def",
"local_kernel_scope",
"constant",
"constant_source",
"readimage",
"readimage_int16",
"readimage_fp32",
"writeimage",
"writeimage_int16",
"writeimage_fp32",
"mri_one",
"mri_multiple",
"image_r8",
"barrier",
"wg_barrier",
"int2float",
"float2int",
"imagereadwrite",
"imagereadwrite3d",
"readimage3d",
"readimage3d_int16",
"readimage3d_fp32",
"bufferreadwriterect",
"arrayreadwrite",
"arraycopy",
"imagearraycopy",
"imagearraycopy3d",
"imagecopy",
"imagecopy3d",
"imagerandomcopy",
"arrayimagecopy",
"arrayimagecopy3d",
"imagenpot",
"vload_global",
"vload_local",
"vload_constant",
"vload_private",
"vstore_global",
"vstore_local",
"vstore_private",
"createkernelsinprogram",
"imagedim_pow2",
"imagedim_non_pow2",
"image_param",
"image_multipass_integer_coord",
"image_multipass_float_coord",
"explicit_s2v_bool",
"explicit_s2v_char",
"explicit_s2v_uchar",
"explicit_s2v_short",
"explicit_s2v_ushort",
"explicit_s2v_int",
"explicit_s2v_uint",
"explicit_s2v_long",
"explicit_s2v_ulong",
"explicit_s2v_float",
"explicit_s2v_double",
"enqueue_map_buffer",
"enqueue_map_image",
"work_item_functions",
"astype",
"async_copy_global_to_local",
"async_copy_local_to_global",
"async_strided_copy_global_to_local",
"async_strided_copy_local_to_global",
"prefetch",
"kernel_call_kernel_function",
"host_numeric_constants",
"kernel_numeric_constants",
"kernel_limit_constants",
"kernel_preprocessor_macros",
"parameter_types",
"vector_creation",
"vec_type_hint",
"kernel_memory_alignment_local",
"kernel_memory_alignment_global",
"kernel_memory_alignment_constant",
"kernel_memory_alignment_private",
"progvar_prog_scope_misc",
"progvar_prog_scope_uninit",
"progvar_prog_scope_init",
"progvar_func_scope",
"global_work_offsets",
"get_global_offset",
"global_linear_id",
"local_linear_id",
"enqueued_local_size",
"simple_read_image_pitch",
"simple_write_image_pitch",
#if defined( __APPLE__ )
"queue_priority",
#endif
"get_linear_ids",
"test_rw_image_access_qualifier",
};
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
int num_fns = sizeof(basefn_names) / sizeof(char *);
int main(int argc, const char *argv[])
{
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
}

View File

@@ -0,0 +1,160 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/rounding_mode.h"
extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
extern int test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_long2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_long4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_hiloeo(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_if(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_sizeof(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_loop(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_pointer_cast(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_local_arg_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_local_kernel_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_local_kernel_scope(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_constant_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_writeimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_writeimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_writeimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_multireadimageonefmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_multireadimagemultifmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_image_r8(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_simplebarrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_wg_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_int2float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_float2int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagearraycopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagearraycopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagereadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagereadwrite3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage3d_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage3d_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements);
extern int test_imagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagerandomcopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
extern int test_arrayimagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_arrayimagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagenpot(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_sampler_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_sampler_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_createkernelsinprogram(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_single_large_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_multiple_max_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_arrayreadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagedim_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagedim_non_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_image_param(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_image_multipass_integer_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_image_multipass_float_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vload_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vload_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vload_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vload_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vstore_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vstore_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vstore_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_astype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_native_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_limit_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_global_work_offsets(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_get_global_offset(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_global_linear_id(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_local_linear_id(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_simple_read_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements);
extern int test_simple_write_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements);
#if defined( __APPLE__ )
extern int test_queue_priority(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
#endif
extern int test_get_linear_ids(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements);
extern int test_rw_image_access_qualifier(cl_device_id device_id, cl_context context, cl_command_queue commands, int num_elements);

View File

@@ -0,0 +1,3 @@
#!/bin/sh
cd `dirname $0`
./test_basic arrayreadwrite arraycopy bufferreadwriterect $@

View File

@@ -0,0 +1,3 @@
#!/bin/sh
cd `dirname $0`
./test_basic arrayimagecopy arrayimagecopy3d imagearraycopy

View File

@@ -0,0 +1,17 @@
#!/bin/sh
cd `dirname $0`
./test_basic \
imagecopy imagerandomcopy \
imagearraycopy imagearraycopy3d \
image_r8 \
readimage readimage_int16 readimage_fp32 \
writeimage writeimage_int16 writeimage_fp32 \
imagenpot \
image_param \
image_multipass_integer_coord \
readimage3d \
readimage3d_int16 \
readimage3d_fp32 \
imagereadwrite3d \
imagereadwrite \
$@

View File

@@ -0,0 +1,4 @@
#!/bin/sh
cd `dirname $0`
./test_basic mri_one mri_multiple

View File

@@ -0,0 +1,201 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *copy_kernel_code =
"__kernel void test_copy(__global unsigned int *src, __global unsigned int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src[tid];\n"
"}\n";
int
test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_uint *input_ptr, *output_ptr;
cl_mem streams[4], results;
cl_program program;
cl_kernel kernel;
unsigned num_elements = 128 * 1024;
cl_uint num_copies = 1;
size_t delta_offset;
unsigned i;
cl_int err;
MTdata d;
int error_count = 0;
input_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
// results
results = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed");
/*****************************************************************************************************************************************/
#pragma mark client backing
log_info("Testing CL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer\n");
// randomize data
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
// client backing
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
test_error(err, "clCreateBuffer failed");
delta_offset = num_elements * sizeof(cl_uint) / num_copies;
for (i=0; i<num_copies; i++)
{
size_t offset = i * delta_offset;
err = clEnqueueCopyBuffer(queue, streams[0], results, offset, offset, delta_offset, 0, NULL, NULL);
test_error(err, "clEnqueueCopyBuffer failed");
}
// Try upload from client backing
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (i=0; i<num_elements; i++)
{
if (input_ptr[i] != output_ptr[i])
{
err = -1;
error_count++;
}
}
if (err)
log_error("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer FAILED\n");
else
log_info("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer passed\n");
#pragma mark framework backing (no client data)
log_info("Testing with clEnqueueWriteBuffer and clEnqueueCopyBuffer\n");
// randomize data
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
// no backing
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE) , sizeof(cl_uint) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed");
for (i=0; i<num_copies; i++)
{
size_t offset = i * delta_offset;
// Copy the array up from host ptr
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_uint)*num_elements, input_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyBuffer(queue, streams[2], results, offset, offset, delta_offset, 0, NULL, NULL);
test_error(err, "clEnqueueCopyBuffer failed");
}
err = clEnqueueReadBuffer( queue, results, true, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (i=0; i<num_elements; i++)
{
if (input_ptr[i] != output_ptr[i])
{
err = -1;
error_count++;
break;
}
}
if (err)
log_error("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer FAILED\n");
else
log_info("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer passed\n");
/*****************************************************************************************************************************************/
#pragma mark kernel copy test
log_info("Testing CL_MEM_USE_HOST_PTR buffer with kernel copy\n");
// randomize data
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
free_mtdata(d); d= NULL;
// client backing
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
test_error(err, "clCreateBuffer failed");
err = create_single_kernel_helper(context, &program, &kernel, 1, &copy_kernel_code, "test_copy" );
test_error(err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel, 0, sizeof streams[3], &streams[3]);
err |= clSetKernelArg(kernel, 1, sizeof results, &results);
test_error(err, "clSetKernelArg failed");
size_t threads[3] = {num_elements, 0, 0};
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed");
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (i=0; i<num_elements; i++)
{
if (input_ptr[i] != output_ptr[i])
{
err = -1;
error_count++;
break;
}
}
// Keep track of multiple errors.
if (error_count != 0)
err = error_count;
if (err)
log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n");
else
log_info("\tCL_MEM_USE_HOST_PTR buffer with kernel copy passed\n");
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseMemObject(results);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,143 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int test_arrayimagecopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
{
cl_uchar *bufptr, *imgptr;
clMemWrapper buffer, image;
int img_width = 512;
int img_height = 512;
size_t elem_size;
size_t buffer_size;
int i;
cl_int err;
MTdata d;
cl_event copyevent;
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
test_error(err, "clGetImageInfo failed");
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
test_error(err, "clCreateBuffer failed");
d = init_genrand( gRandomSeed );
bufptr = (cl_uchar*)malloc(buffer_size);
for (i=0; i<(int)buffer_size; i++) {
bufptr[i] = (cl_uchar)genrand_int32(d);
}
free_mtdata(d); d = NULL;
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
test_error(err, "clEnqueueCopyImageToBuffer failed");
imgptr = (cl_uchar*)malloc(buffer_size);
err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
test_error(err, "clEnqueueReadBuffer failed");
if (memcmp(bufptr, imgptr, buffer_size) != 0) {
log_error( "ERROR: Results did not validate!\n" );
unsigned char * inchar = (unsigned char*)bufptr;
unsigned char * outchar = (unsigned char*)imgptr;
int failuresPrinted = 0;
int i;
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
int failed = 0;
int j;
for (j=0; j<(int)elem_size; j++)
if (inchar[i+j] != outchar[i+j])
failed = 1;
char values[4096];
values[0] = 0;
if (failed) {
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
int j;
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
sprintf(values + strlen(values), "] != expected [");
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
err = -1;
}
free(bufptr);
free(imgptr);
if (err)
log_error("ARRAY to IMAGE copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
return err;
}
int test_arrayimagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_int err;
cl_image_format *formats;
cl_uint num_formats;
cl_uint i;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
test_error(err, "clGetSupportedImageFormats failed");
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
test_error(err, "clGetSupportedImageFormats failed");
for (i = 0; i < num_formats; i++) {
err |= test_arrayimagecopy_single_format(device, context, queue, &formats[i]);
}
free(formats);
if (err)
log_error("ARRAY to IMAGE copy test failed\n");
else
log_info("ARRAY to IMAGE copy test passed\n");
return err;
}

View File

@@ -0,0 +1,144 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int test_arrayimagecopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
{
cl_uchar *bufptr, *imgptr;
clMemWrapper buffer, image;
int img_width = 128;
int img_height = 128;
int img_depth = 32;
size_t elem_size;
size_t buffer_size;
int i;
cl_int err;
MTdata d;
cl_event copyevent;
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
test_error(err, "clGetImageInfo failed");
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
test_error(err, "clCreateBuffer failed");
d = init_genrand( gRandomSeed );
bufptr = (cl_uchar*)malloc(buffer_size);
for (i=0; i<(int)buffer_size; i++) {
bufptr[i] = (cl_uchar)genrand_int32(d);
}
free_mtdata(d); d = NULL;
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
test_error(err, "clEnqueueCopyImageToBuffer failed");
imgptr = (cl_uchar*)malloc(buffer_size);
err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
test_error(err, "clEnqueueReadBuffer failed");
if (memcmp(bufptr, imgptr, buffer_size) != 0) {
log_error( "ERROR: Results did not validate!\n" );
unsigned char * inchar = (unsigned char*)bufptr;
unsigned char * outchar = (unsigned char*)imgptr;
int failuresPrinted = 0;
int i;
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
int failed = 0;
int j;
for (j=0; j<(int)elem_size; j++)
if (inchar[i+j] != outchar[i+j])
failed = 1;
char values[4096];
values[0] = 0;
if (failed) {
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
int j;
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
sprintf(values + strlen(values), "] != expected [");
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
err = -1;
}
free(bufptr);
free(imgptr);
if (err)
log_error("ARRAY to IMAGE3D copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
return err;
}
int test_arrayimagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_int err;
cl_image_format *formats;
cl_uint num_formats;
cl_uint i;
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
test_error(err, "clGetSupportedImageFormats failed");
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
test_error(err, "clGetSupportedImageFormats failed");
for (i = 0; i < num_formats; i++) {
err |= test_arrayimagecopy3d_single_format(device, context, queue, &formats[i]);
}
free(formats);
if (err)
log_error("ARRAY to IMAGE3D copy test failed\n");
else
log_info("ARRAY to IMAGE3D copy test passed\n");
return err;
}

View File

@@ -0,0 +1,95 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int
test_arrayreadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_uint *inptr, *outptr;
cl_mem streams[1];
int num_tries = 400;
num_elements = 1024 * 1024 * 4;
int i, j, err;
MTdata d;
inptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
outptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
// randomize data
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
inptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed");
for (i=0; i<num_tries; i++)
{
int offset;
int cb;
do {
offset = (int)(genrand_int32(d) & 0x7FFFFFFF);
if (offset > 0 && offset < num_elements)
break;
} while (1);
cb = (int)(genrand_int32(d) & 0x7FFFFFFF);
if (cb > (num_elements - offset))
cb = num_elements - offset;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), sizeof(cl_uint)*cb,&inptr[offset], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), cb*sizeof(cl_uint), &outptr[offset], 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (j=offset; j<offset+cb; j++)
{
if (inptr[j] != outptr[j])
{
log_error("ARRAY read, write test failed\n");
err = -1;
break;
}
}
if (err)
break;
}
free_mtdata(d);
clReleaseMemObject(streams[0]);
free(inptr);
free(outptr);
if (!err)
log_info("ARRAY read, write test passed\n");
return err;
}

View File

@@ -0,0 +1,288 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
static const char *astype_kernel_pattern =
"%s\n"
"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( src[ tid ] );\n"
" dst[ tid ] = tmp;\n"
"}\n";
static const char *astype_kernel_pattern_V3srcV3dst =
"%s\n"
"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
" vstore3(tmp,tid,dst);\n"
"}\n";
// in the printf, remove the third and fifth argument, each of which
// should be a "3", when copying from the printf for astype_kernel_pattern
static const char *astype_kernel_pattern_V3dst =
"%s\n"
"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s3 tmp = as_%s3( src[ tid ] );\n"
" vstore3(tmp,tid,dst);\n"
"}\n";
// in the printf, remove the fifth argument, which
// should be a "3", when copying from the printf for astype_kernel_pattern
static const char *astype_kernel_pattern_V3src =
"%s\n"
"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
" dst[ tid ] = tmp;\n"
"}\n";
// in the printf, remove the third argument, which
// should be a "3", when copying from the printf for astype_kernel_pattern
int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
unsigned int vecSize, unsigned int outVecSize,
int numElements )
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
char programSrc[ 10240 ];
size_t threads[ 1 ], localThreads[ 1 ];
size_t typeSize = get_explicit_type_size( inVecType );
size_t outTypeSize = get_explicit_type_size(outVecType);
char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
MTdata d;
// Create program
if(outVecSize == 3 && vecSize == 3) {
// astype_kernel_pattern_V3srcV3dst
sprintf( programSrc, astype_kernel_pattern_V3srcV3dst,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), // sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] );
} else if(outVecSize == 3) {
// astype_kernel_pattern_V3dst
sprintf( programSrc, astype_kernel_pattern_V3dst,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
get_explicit_type_name( outVecType ),
get_explicit_type_name( outVecType ),
get_explicit_type_name( outVecType ));
} else if(vecSize == 3) {
// astype_kernel_pattern_V3src
sprintf( programSrc, astype_kernel_pattern_V3src,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ),// sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
} else {
sprintf( programSrc, astype_kernel_pattern,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
}
const char *ptr = programSrc;
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
// Create some input values
size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
char *inBuffer = (char*)malloc( inBufferSize );
size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
char *outBuffer = (char*)malloc( outBufferSize );
d = init_genrand( gRandomSeed );
generate_random_data( inVecType, numElements * vecSize,
d, inBuffer );
free_mtdata(d); d = NULL;
// Create I/O streams and set arguments
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error );
test_error( error, "Unable to create I/O stream" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
test_error( error, "Unable to create I/O stream" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
test_error( error, "Unable to set kernel argument" );
// Run the kernel
threads[ 0 ] = numElements;
error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
test_error( error, "Unable to get group size to run with" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
// Get the results and compare
// The beauty is that astype is supposed to return the bit pattern as a different type, which means
// the output should have the exact same bit pattern as the input. No interpretation necessary!
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
char *expected = inBuffer;
char *actual = outBuffer;
size_t compSize = typeSize*vecSize;
if(outTypeSize*outVecSize < compSize) {
compSize = outTypeSize*outVecSize;
}
if(outVecSize == 4 && vecSize == 3)
{
// as_type4(vec3) should compile but produce undefined results??
free(inBuffer);
free(outBuffer);
return 0;
}
if(outVecSize != 3 && vecSize != 3 && outVecSize != vecSize)
{
// as_typen(vecm) should compile and run but produce
// implementation-defined results for m != n
// and n*sizeof(type) = sizeof(vecm)
free(inBuffer);
free(outBuffer);
return 0;
}
for( int i = 0; i < numElements; i++ )
{
if( memcmp( expected, actual, compSize ) != 0 )
{
char expectedString[ 1024 ], actualString[ 1024 ];
log_error( "ERROR: Data sample %d of %d for as_%s%d( %s%d ) did not validate (expected {%s}, got {%s})\n",
(int)i, (int)numElements, get_explicit_type_name( outVecType ), vecSize, get_explicit_type_name( inVecType ), vecSize,
GetDataVectorString( expected, typeSize, vecSize, expectedString ),
GetDataVectorString( actual, typeSize, vecSize, actualString ) );
log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
programSrc, (int)threads[0],(int) localThreads[0]);
free(inBuffer);
free(outBuffer);
return 1;
}
expected += typeSize * vecSize;
actual += outTypeSize * outVecSize;
}
free(inBuffer);
free(outBuffer);
return 0;
}
int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// Note: although casting to different vector element sizes that match the same size (i.e. short2 -> char4) is
// legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
// for us to verify what is "valid". So the only thing we can test are types that match in size independent
// of the element count (char -> uchar, etc)
ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
size_t inTypeSize, outTypeSize;
int error = 0;
for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ )
{
inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
continue;
if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong )
continue;
for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ )
{
outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) {
continue;
}
if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
continue;
// change this check
if( inTypeIdx == outTypeIdx ) {
continue;
}
log_info( " (%s->%s)\n", get_explicit_type_name( vecTypes[ inTypeIdx ] ), get_explicit_type_name( vecTypes[ outTypeIdx ] ) );
fflush( stdout );
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
{
for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
{
if(vecSizes[sizeIdx]*inTypeSize !=
vecSizes[outSizeIdx]*outTypeSize )
{
continue;
}
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
}
}
if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
get_explicit_type_size(vecTypes[outTypeIdx])) {
// as_type3(vec4) allowed, as_type4(vec3) not allowed
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 3, 4, n_elems );
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 4, 3, n_elems );
}
}
}
return error;
}

View File

@@ -0,0 +1,279 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
static const char *async_global_to_local_kernel =
"%s\n" // optional pragma string
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
"{\n"
" int i;\n"
// Zero the local storage first
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" event_t event;\n"
" event = async_work_group_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );\n"
// Wait for the copy to complete, then verify by manually copying to the dest
" wait_group_events( 1, &event );\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
"}\n" ;
static const char *async_local_to_global_kernel =
"%s\n" // optional pragma string
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
"{\n"
" int i;\n"
// Zero the local storage first
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
// Do this to verify all kernels are done copying to the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" event_t event;\n"
" event = async_work_group_copy((__global %s*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, 0 );\n"
" wait_group_events( 1, &event );\n"
"}\n" ;
static const char *prefetch_kernel =
"%s\n" // optional pragma string
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
"{\n"
" // Ignore this: %s%s%s\n"
" int i;\n"
" prefetch( (const __global %s*)(src+copiesPerWorkItem*get_global_id(0)), copiesPerWorkItem);\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
"}\n" ;
int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode,
ExplicitType vecType, int vecSize
)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
size_t threads[ 1 ], localThreads[ 1 ];
void *inBuffer, *outBuffer;
MTdata d;
char vecNameString[64]; vecNameString[0] = 0;
if (vecSize == 1)
sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
else
sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
size_t elementSize = get_explicit_type_size(vecType)*vecSize;
log_info("Testing %s\n", vecNameString);
cl_long max_local_mem_size;
error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
unsigned int num_of_compute_devices;
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
char programSource[4096]; programSource[0]=0;
char *programPtr;
sprintf(programSource, kernelCode,
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
//log_info("program: %s\n", programSource);
programPtr = programSource;
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
size_t max_workgroup_size;
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
size_t max_local_workgroup_size[3];
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (max_workgroup_size > max_local_workgroup_size[0])
max_workgroup_size = max_local_workgroup_size[0];
size_t numberOfCopiesPerWorkitem = 13;
elementSize = get_explicit_type_size(vecType)* ((vecSize == 3) ? 4 : vecSize);
size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
// Calculation can return 0 on embedded devices due to 1KB local mem limit
if(maxLocalWorkgroupSize == 0)
{
maxLocalWorkgroupSize = 1;
}
size_t localWorkgroupSize = maxLocalWorkgroupSize;
if (maxLocalWorkgroupSize > max_workgroup_size)
localWorkgroupSize = max_workgroup_size;
size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
size_t numberOfLocalWorkgroups = 1111;
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize;
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
inBuffer = (void*)malloc(globalBufferSize);
outBuffer = (void*)malloc(globalBufferSize);
memset(outBuffer, 0, globalBufferSize);
cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
log_info("Global: %d, local %d, local buffer %db, global buffer %db, each work group will copy %d elements and each work item item will copy %d elements.\n",
(int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, copiesPerWorkgroup, copiesPerWorkItemInt);
threads[0] = globalWorkgroupSize;
localThreads[0] = localWorkgroupSize;
d = init_genrand( gRandomSeed );
generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
free_mtdata(d); d = NULL;
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
test_error( error, "Unable to create input buffer" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
test_error( error, "Unable to set kernel argument" );
// Enqueue
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to queue kernel" );
// Read
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Verify
int failuresPrinted = 0;
if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 )
{
size_t typeSize = get_explicit_type_size(vecType)* vecSize;
unsigned char * inchar = (unsigned char*)inBuffer;
unsigned char * outchar = (unsigned char*)outBuffer;
for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) {
if (memcmp( ((char *)inchar)+i, ((char *)outchar)+i, typeSize) != 0 )
{
char values[4096];
values[0] = 0;
if ( failuresPrinted == 0 ) {
// Print first failure message
log_error( "ERROR: Results of copy did not validate!\n" );
}
sprintf(values + strlen( values), "%d -> [", i);
for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", inchar[i+j]);
sprintf(values + strlen(values), "] != [");
for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
}
free(inBuffer);
free(outBuffer);
return failuresPrinted ? -1 : 0;
}
int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) {
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
unsigned int size, typeIndex;
int errors = 0;
for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
{
if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
continue;
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
continue;
for( size = 0; vecSizes[ size ] != 0; size++ )
{
if (test_copy( deviceID, context, queue, kernelCode, vecType[typeIndex],vecSizes[size] )) {
errors++;
}
}
}
if (errors)
return -1;
return 0;
}
int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel );
}
int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_copy_all_types( deviceID, context, queue, async_local_to_global_kernel );
}
int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_copy_all_types( deviceID, context, queue, prefetch_kernel );
}

View File

@@ -0,0 +1,274 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
static const char *async_strided_global_to_local_kernel =
"%s\n" // optional pragma string
"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
"{\n"
" int i;\n"
// Zero the local storage first
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" event_t event;\n"
" event = async_work_group_strided_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*stride*get_group_id(0)), (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
// Wait for the copy to complete, then verify by manually copying to the dest
" wait_group_events( 1, &event );\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" dst[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
"}\n" ;
static const char *async_strided_local_to_global_kernel =
"%s\n" // optional pragma string
"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
"{\n"
" int i;\n"
// Zero the local storage first
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ];\n"
// Do this to verify all kernels are done copying to the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" event_t event;\n"
" event = async_work_group_strided_copy((__global %s*)(dst+copiesPerWorkgroup*stride*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
" wait_group_events( 1, &event );\n"
"}\n" ;
int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
size_t threads[ 1 ], localThreads[ 1 ];
void *inBuffer, *outBuffer;
MTdata d;
char vecNameString[64]; vecNameString[0] = 0;
if (vecSize == 1)
sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
else
sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
log_info("Testing %s\n", vecNameString);
cl_long max_local_mem_size;
error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
unsigned int num_of_compute_devices;
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
char programSource[4096]; programSource[0]=0;
char *programPtr;
sprintf(programSource, kernelCode,
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
"",
vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
//log_info("program: %s\n", programSource);
programPtr = programSource;
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
size_t max_workgroup_size;
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
size_t max_local_workgroup_size[3];
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (max_workgroup_size > max_local_workgroup_size[0])
max_workgroup_size = max_local_workgroup_size[0];
size_t elementSize = get_explicit_type_size(vecType)* ((vecSize == 3) ? 4 : vecSize);
cl_ulong max_global_mem_size;
error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(max_global_mem_size), &max_global_mem_size, NULL);
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
if (max_global_mem_size > (cl_ulong)SIZE_MAX) {
max_global_mem_size = (cl_ulong)SIZE_MAX;
}
cl_bool unified_mem;
error = clGetDeviceInfo(deviceID, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(unified_mem), &unified_mem, NULL);
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
int number_of_global_mem_buffers = (unified_mem) ? 4 : 2;
size_t numberOfCopiesPerWorkitem = 3;
size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
size_t localWorkgroupSize = maxLocalWorkgroupSize;
if (maxLocalWorkgroupSize > max_workgroup_size)
localWorkgroupSize = max_workgroup_size;
size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
size_t numberOfLocalWorkgroups = 579;//1111;
// Reduce the numberOfLocalWorkgroups so that no more than 1/2 of CL_DEVICE_GLOBAL_MEM_SIZE is consumed
// by the allocated buffer. This is done to avoid resource errors resulting from address space fragmentation.
size_t numberOfLocalWorkgroupsLimit = max_global_mem_size / (2 * number_of_global_mem_buffers * localBufferSize * stride);
if (numberOfLocalWorkgroups > numberOfLocalWorkgroupsLimit) numberOfLocalWorkgroups = numberOfLocalWorkgroupsLimit;
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride;
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
inBuffer = (void*)malloc(globalBufferSize);
outBuffer = (void*)malloc(globalBufferSize);
memset(outBuffer, 0, globalBufferSize);
cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
log_info("Global: %d, local %d, local buffer %db, global buffer %db, copy stride %d, each work group will copy %d elements and each work item item will copy %d elements.\n",
(int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, (int)stride, copiesPerWorkgroup, copiesPerWorkItemInt);
threads[0] = globalWorkgroupSize;
localThreads[0] = localWorkgroupSize;
d = init_genrand( gRandomSeed );
generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
free_mtdata(d); d = NULL;
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
test_error( error, "Unable to create input buffer" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 5, sizeof(stride), &stride );
test_error( error, "Unable to set kernel argument" );
// Enqueue
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to queue kernel" );
// Read
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Verify
size_t typeSize = get_explicit_type_size(vecType)* vecSize;
for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride)
{
if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, typeSize) != 0 )
{
unsigned char * inchar = (unsigned char*)inBuffer + i;
unsigned char * outchar = (unsigned char*)outBuffer + i;
char values[4096];
values[0] = 0;
log_error( "ERROR: Results of copy did not validate!\n" );
sprintf(values + strlen( values), "%d -> [", i);
for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]);
sprintf(values + strlen(values), "] != [");
for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
return -1;
}
}
free(inBuffer);
free(outBuffer);
return 0;
}
int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode)
{
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
unsigned int size, typeIndex, stride;
int errors = 0;
for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
{
if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
continue;
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
continue;
for( size = 0; vecSizes[ size ] != 0; size++ )
{
for( stride = 0; strideSizes[ stride ] != 0; stride++)
{
if (test_strided_copy( deviceID, context, queue, kernelCode, vecType[typeIndex], vecSizes[size], strideSizes[stride] ))
{
errors++;
}
}
}
}
if (errors)
return -1;
return 0;
}
int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel );
}
int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_strided_copy_all_types( deviceID, context, queue, async_strided_local_to_global_kernel );
}

View File

@@ -0,0 +1,159 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *barrier_kernel_code =
"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
"{\n"
" int tid = get_local_id(0);\n"
" int lsize = get_local_size(0);\n"
" int i;\n"
"\n"
" tmp_sum[tid] = 0;\n"
" for (i=tid; i<n; i+=lsize)\n"
" tmp_sum[tid] += a[i];\n"
" \n"
" // updated to work for any workgroup size \n"
" for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
" {\n"
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
" if (tid + i < lsize)\n"
" tmp_sum[tid] += tmp_sum[tid + i];\n"
" lsize = i; \n"
" }\n"
"\n"
" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
" if (tid == 0)\n"
" *sum = tmp_sum[0];\n"
"}\n";
static int
verify_sum(int *inptr, int *outptr, int n)
{
int r = 0;
int i;
for (i=0; i<n; i++)
{
r += inptr[i];
}
if (r != outptr[0])
{
log_error("BARRIER test failed\n");
return -1;
}
log_info("BARRIER test passed\n");
return 0;
}
int
test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_int *input_ptr = NULL, *output_ptr = NULL;
cl_program program;
cl_kernel kernel;
size_t global_threads[3];
size_t local_threads[3];
int err;
int i;
size_t max_local_workgroup_size[3];
size_t max_threadgroup_size = 0;
MTdata d;
err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" );
test_error(err, "Failed to build kernel/program.");
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
test_error(err, "clGetKernelWorkgroupInfo failed.");
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (max_threadgroup_size > max_local_workgroup_size[0])
max_threadgroup_size = max_local_workgroup_size[0];
// work group size must divide evenly into the global size
while( num_elements % max_threadgroup_size )
max_threadgroup_size--;
input_ptr = (int*)malloc(sizeof(int) * num_elements);
output_ptr = (int*)malloc(sizeof(int));
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int), NULL, &err);
test_error(err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * max_threadgroup_size, NULL, &err);
test_error(err, "clCreateBuffer failed.");
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed.");
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
test_error(err, "clSetKernelArg failed.");
global_threads[0] = max_threadgroup_size;
local_threads[0] = max_threadgroup_size;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed.");
err = verify_sum(input_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,303 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *kernel_code =
"__kernel void test_kernel(\n"
"char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,\n"
"__global float%s *result)\n"
"{\n"
" result[0] = %s(c);\n"
" result[1] = %s(uc);\n"
" result[2] = %s(s);\n"
" result[3] = %s(us);\n"
" result[4] = %s(i);\n"
" result[5] = %s(ui);\n"
" result[6] = f;\n"
"}\n";
const char *kernel_code_long =
"__kernel void test_kernel_long(\n"
"long%s l, ulong%s ul,\n"
"__global float%s *result)\n"
"{\n"
" result[0] = %s(l);\n"
" result[1] = %s(ul);\n"
"}\n";
int
test_basic_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clMemWrapper results;
int error;
size_t global[3] = {1, 1, 1};
float results_back[2*16];
int count, index;
const char* types[] = { "long", "ulong" };
char kernel_string[8192];
int sizes[] = {1, 2, 4, 8, 16};
const char* size_strings[] = {"", "2", "4", "8", "16"};
float expected;
int total_errors = 0;
int size_to_test;
char *ptr;
char convert_string[1024];
size_t max_parameter_size;
// We don't really care about the contents since we're just testing that the types work.
cl_long l[16]={-21,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_ulong ul[16]={22,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
// Calculate how large our paramter size is to the kernel
size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong);
// Init our strings.
kernel_string[0] = '\0';
convert_string[0] = '\0';
// Get the maximum parameter size allowed
error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
test_error( error, "Unable to get max parameter size from device" );
// Create the results buffer
results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error);
test_error(error, "clCreateBuffer failed");
// Go over all the vector sizes
for (size_to_test = 0; size_to_test < 5; size_to_test++) {
clProgramWrapper program;
clKernelWrapper kernel;
size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
if (total_parameter_size > max_parameter_size) {
log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
(int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
continue;
}
log_info("Testing vector size %d\n", sizes[size_to_test]);
// If size is > 1, then we need a explicit convert call.
if (sizes[size_to_test] > 1) {
sprintf(convert_string, "convert_float%s", size_strings[size_to_test]);
} else {
sprintf(convert_string, " ");
}
// Build the kernel
sprintf(kernel_string, kernel_code_long,
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
convert_string, convert_string
);
ptr = kernel_string;
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel_long");
test_error(error, "create single kernel failed");
// Set the arguments
for (count = 0; count < 2; count++) {
switch (count) {
case 0: error = clSetKernelArg(kernel, count, sizeof(cl_long)*sizes[size_to_test], &l); break;
case 1: error = clSetKernelArg(kernel, count, sizeof(cl_ulong)*sizes[size_to_test], &ul); break;
default: log_error("Test error"); break;
}
if (error)
log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
test_error(error, "clSetKernelArgs failed");
}
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results);
test_error(error, "clSetKernelArgs failed");
// Execute
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*2*16, results_back, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
for (count = 0; count < 2; count++) {
for (index=0; index < sizes[size_to_test]; index++) {
switch (count) {
case 0: expected = (float)l[index]; break;
case 1: expected = (float)ul[index]; break;
default: log_error("Test error"); break;
}
if (results_back[count*sizes[size_to_test]+index] != expected) {
total_errors++;
log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
index, results_back[count*sizes[size_to_test]+index], expected);
}
}
}
}
return total_errors;
}
int
test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clMemWrapper results;
int error;
size_t global[3] = {1, 1, 1};
float results_back[7*16];
int count, index;
const char* types[] = {"char", "uchar", "short", "ushort", "int", "uint", "float"};
char kernel_string[8192];
int sizes[] = {1, 2, 4, 8, 16};
const char* size_strings[] = {"", "2", "4", "8", "16"};
float expected;
int total_errors = 0;
int size_to_test;
char *ptr;
char convert_string[1024];
size_t max_parameter_size;
// We don't really care about the contents since we're just testing that the types work.
cl_char c[16]={0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_uchar uc[16]={16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_short s[16]={-17,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_ushort us[16]={18,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_int i[16]={-19,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_uint ui[16]={20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_float f[16]={-23,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
// Calculate how large our paramter size is to the kernel
size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) +
sizeof(cl_short) +sizeof(cl_ushort) +
sizeof(cl_int) +sizeof(cl_uint) +
sizeof(cl_float);
// Init our strings.
kernel_string[0] = '\0';
convert_string[0] = '\0';
// Get the maximum parameter size allowed
error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
test_error( error, "Unable to get max parameter size from device" );
// Create the results buffer
results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error);
test_error(error, "clCreateBuffer failed");
// Go over all the vector sizes
for (size_to_test = 0; size_to_test < 5; size_to_test++) {
clProgramWrapper program;
clKernelWrapper kernel;
size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
if (total_parameter_size > max_parameter_size) {
log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
(int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
continue;
}
log_info("Testing vector size %d\n", sizes[size_to_test]);
// If size is > 1, then we need a explicit convert call.
if (sizes[size_to_test] > 1) {
sprintf(convert_string, "convert_float%s", size_strings[size_to_test]);
} else {
sprintf(convert_string, " ");
}
// Build the kernel
sprintf(kernel_string, kernel_code,
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
size_strings[size_to_test], size_strings[size_to_test],
convert_string, convert_string, convert_string,
convert_string, convert_string, convert_string
);
ptr = kernel_string;
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel");
test_error(error, "create single kernel failed");
// Set the arguments
for (count = 0; count < 7; count++) {
switch (count) {
case 0: error = clSetKernelArg(kernel, count, sizeof(cl_char)*sizes[size_to_test], &c); break;
case 1: error = clSetKernelArg(kernel, count, sizeof(cl_uchar)*sizes[size_to_test], &uc); break;
case 2: error = clSetKernelArg(kernel, count, sizeof(cl_short)*sizes[size_to_test], &s); break;
case 3: error = clSetKernelArg(kernel, count, sizeof(cl_ushort)*sizes[size_to_test], &us); break;
case 4: error = clSetKernelArg(kernel, count, sizeof(cl_int)*sizes[size_to_test], &i); break;
case 5: error = clSetKernelArg(kernel, count, sizeof(cl_uint)*sizes[size_to_test], &ui); break;
case 6: error = clSetKernelArg(kernel, count, sizeof(cl_float)*sizes[size_to_test], &f); break;
default: log_error("Test error"); break;
}
if (error)
log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
test_error(error, "clSetKernelArgs failed");
}
error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results);
test_error(error, "clSetKernelArgs failed");
// Execute
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*7*16, results_back, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
for (count = 0; count < 7; count++) {
for (index=0; index < sizes[size_to_test]; index++) {
switch (count) {
case 0: expected = (float)c[index]; break;
case 1: expected = (float)uc[index]; break;
case 2: expected = (float)s[index]; break;
case 3: expected = (float)us[index]; break;
case 4: expected = (float)i[index]; break;
case 5: expected = (float)ui[index]; break;
case 6: expected = (float)f[index]; break;
default: log_error("Test error"); break;
}
if (results_back[count*sizes[size_to_test]+index] != expected) {
total_errors++;
log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
index, results_back[count*sizes[size_to_test]+index], expected);
}
}
}
}
if (gHasLong) {
log_info("Testing long types...\n");
total_errors += test_basic_parameter_types_long( device, context, queue, num_elements );
}
else {
log_info("Longs unsupported, skipping.");
}
return total_errors;
}

View File

@@ -0,0 +1,564 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#define CL_EXIT_ERROR(cmd,format,...) \
{ \
if ((cmd) != CL_SUCCESS) { \
log_error("CL ERROR: %s %u: ", __FILE__,__LINE__); \
log_error(format,## __VA_ARGS__ ); \
log_error("\n"); \
/*abort();*/ \
} \
}
typedef unsigned char BufferType;
// Globals for test
cl_command_queue queue;
// Width and height of each pair of images.
enum { TotalImages = 8 };
size_t width [TotalImages];
size_t height [TotalImages];
size_t depth [TotalImages];
// cl buffer and host buffer.
cl_mem buffer [TotalImages];
BufferType* verify[TotalImages];
BufferType* backing[TotalImages];
// Temporary buffer used for read and write operations.
BufferType* tmp_buffer;
size_t tmp_buffer_size;
size_t num_tries = 50; // Number of randomly selected operations to perform.
size_t alloc_scale = 2; // Scale term applied buffer allocation size.
MTdata mt;
// Initialize a buffer in host memory containing random values of the specified size.
static void initialize_image(BufferType* ptr, size_t w, size_t h, size_t d, MTdata mt)
{
enum { ElementSize = sizeof(BufferType)/sizeof(unsigned char) };
unsigned char* buf = (unsigned char*)ptr;
size_t size = w*h*d*ElementSize;
for (size_t i = 0; i != size; i++) {
buf[i] = (unsigned char)(genrand_int32(mt) % 0xff);
}
}
// This function prints the contents of a buffer to standard error.
void print_buffer(BufferType* buf, size_t w, size_t h, size_t d) {
log_error("Size = %lux%lux%lu (%lu total)\n",w,h,d,w*h*d);
for (unsigned k=0; k!=d;++k) {
log_error("Slice: %u\n",k);
for (unsigned j=0; j!=h;++j) {
for (unsigned i=0;i!=w;++i) {
log_error("%02x",buf[k*(w*h)+j*w+i]);
}
log_error("\n");
}
log_error("\n");
}
}
// Returns true if the two specified regions overlap.
bool check_overlap_rect(size_t src_offset[3],
size_t dst_offset[3],
size_t region[3],
size_t row_pitch,
size_t slice_pitch)
{
const size_t src_min[] = { src_offset[0], src_offset[1], src_offset[2] };
const size_t src_max[] = { src_offset[0] + region[0], src_offset[1] + region[1], src_offset[2] + region[2] };
const size_t dst_min[] = { dst_offset[0], dst_offset[1], dst_offset[2] };
const size_t dst_max[] = { dst_offset[0] + region[0],
dst_offset[1] + region[1],
dst_offset[2] + region[2]};
// Check for overlap
bool overlap = true;
unsigned i;
for (i = 0; i != 3; ++i)
{
overlap = overlap && (src_min[i] < dst_max[i]) && (src_max[i] > dst_min[i]);
}
size_t dst_start = dst_offset[2] * slice_pitch + dst_offset[1] * row_pitch + dst_offset[0];
size_t dst_end = dst_start + (region[2] * slice_pitch +
region[1] * row_pitch + region[0]);
size_t src_start = src_offset[2] * slice_pitch + src_offset[1] * row_pitch + src_offset[0];
size_t src_end = src_start + (region[2] * slice_pitch +
region[1] * row_pitch + region[0]);
if (!overlap) {
size_t delta_src_x = (src_offset[0] + region[0] > row_pitch) ?
src_offset[0] + region[0] - row_pitch : 0; size_t delta_dst_x = (dst_offset[0] + region[0] > row_pitch) ?
dst_offset[0] + region[0] - row_pitch : 0;
if ((delta_src_x > 0 && delta_src_x > dst_offset[0]) ||
(delta_dst_x > 0 && delta_dst_x > src_offset[0])) {
if ((src_start <= dst_start && dst_start < src_end) || (dst_start <= src_start && src_start < dst_end)) overlap = true;
}
if (region[2] > 1) {
size_t src_height = slice_pitch / row_pitch; size_t dst_height = slice_pitch / row_pitch;
size_t delta_src_y = (src_offset[1] + region[1] > src_height) ? src_offset[1] + region[1] - src_height : 0;
size_t delta_dst_y = (dst_offset[1] + region[1] > dst_height) ? dst_offset[1] + region[1] - dst_height : 0;
if ((delta_src_y > 0 && delta_src_y > dst_offset[1]) ||
(delta_dst_y > 0 && delta_dst_y > src_offset[1])) {
if ((src_start <= dst_start && dst_start < src_end) || (dst_start <= src_start && src_start < dst_end))
overlap = true;
}
}
}
return overlap;
}
// This function invokes the CopyBufferRect CL command and then mirrors the operation on the host side verify buffers.
int copy_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
// Copy between cl buffers.
size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
size_t src_row_pitch = width[src];
cl_int err;
if (check_overlap_rect(soffset,doffset,sregion,src_row_pitch, src_slice_pitch)) {
log_info( "Copy overlap reported, skipping copy buffer rect\n" );
return CL_SUCCESS;
} else {
if ((err = clEnqueueCopyBufferRect(queue,
buffer[src],buffer[dst],
soffset, doffset,
sregion,/*dregion,*/
width[src], src_slice_pitch,
width[dst], dst_slice_pitch,
0, NULL, NULL)) != CL_SUCCESS)
{
CL_EXIT_ERROR(err, "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
}
}
// Copy between host buffers.
size_t total = sregion[0] * sregion[1] * sregion[2];
size_t spitch = width[src];
size_t sslice = width[src]*height[src];
size_t dpitch = width[dst];
size_t dslice = width[dst]*height[dst];
for (size_t i = 0; i != total; ++i) {
// Compute the coordinates of the element within the source and destination regions.
size_t rslice = sregion[0]*sregion[1];
size_t sz = i / rslice;
size_t sy = (i % rslice) / sregion[0];
size_t sx = (i % rslice) % sregion[0];
size_t dz = sz;
size_t dy = sy;
size_t dx = sx;
// Compute the offset in bytes of the source and destination.
size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
verify[dst][d_idx] = verify[src][s_idx];
}
return 0;
}
// This function compares the destination region in the buffer pointed
// to by device, to the source region of the specified verify buffer.
int verify_region(BufferType* device, size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3]) {
// Copy between host buffers.
size_t spitch = width[src];
size_t sslice = width[src]*height[src];
size_t dpitch = width[dst];
size_t dslice = width[dst]*height[dst];
size_t total = sregion[0] * sregion[1] * sregion[2];
for (size_t i = 0; i != total; ++i) {
// Compute the coordinates of the element within the source and destination regions.
size_t rslice = sregion[0]*sregion[1];
size_t sz = i / rslice;
size_t sy = (i % rslice) / sregion[0];
size_t sx = (i % rslice) % sregion[0];
// Compute the offset in bytes of the source and destination.
size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
size_t d_idx = (doffset[2]+sz)*dslice + (doffset[1]+sy)*dpitch + doffset[0]+sx;
if (device[d_idx] != verify[src][s_idx]) {
log_error("Verify failed on comparsion %lu: coordinate (%lu, %lu, %lu) of region\n",i,sx,sy,sz);
log_error("0x%02x != 0x%02x\n", device[d_idx], verify[src][s_idx]);
#if 0
// Uncomment this section to print buffers.
log_error("Device (copy): [%lu]\n",dst);
print_buffer(device,width[dst],height[dst],depth[dst]);
log_error("\n");
log_error("Verify: [%lu]\n",src);
print_buffer(verify[src],width[src],height[src],depth[src]);
log_error("\n");
abort();
#endif
return -1;
}
}
return 0;
}
// This function invokes ReadBufferRect to read a region from the
// specified source buffer into a temporary destination buffer. The
// contents of the temporary buffer are then compared to the source
// region of the corresponding verify buffer.
int read_verify_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
// Clear the temporary destination host buffer.
memset(tmp_buffer, 0xff, tmp_buffer_size);
size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
// Copy the source region of the cl buffer, to the destination region of the temporary buffer.
CL_EXIT_ERROR(clEnqueueReadBufferRect(queue,
buffer[src],
CL_TRUE,
soffset,doffset,
sregion,
width[src], src_slice_pitch,
width[dst], dst_slice_pitch,
tmp_buffer,
0, NULL, NULL), "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
return verify_region(tmp_buffer,src,soffset,sregion,dst,doffset);
}
// This function performs the same verification check as
// read_verify_region, except a MapBuffer command is used to access the
// device buffer data instead of a ReadBufferRect, and the whole
// buffer is checked.
int map_verify_region(size_t src) {
size_t size_bytes = width[src]*height[src]*depth[src]*sizeof(BufferType);
// Copy the source region of the cl buffer, to the destination region of the temporary buffer.
cl_int err;
BufferType* mapped = (BufferType*)clEnqueueMapBuffer(queue,buffer[src],CL_TRUE,CL_MAP_READ,0,size_bytes,0,NULL,NULL,&err);
CL_EXIT_ERROR(err, "clEnqueueMapBuffer failed for buffer %u",(unsigned)src);
size_t soffset[] = { 0, 0, 0 };
size_t sregion[] = { width[src], height[src], depth[src] };
int ret = verify_region(mapped,src,soffset,sregion,src,soffset);
CL_EXIT_ERROR(clEnqueueUnmapMemObject(queue,buffer[src],mapped,0,NULL,NULL),
"clEnqueueUnmapMemObject failed for buffer %u",(unsigned)src);
return ret;
}
// This function generates a new temporary buffer and then writes a
// region of it to a region in the specified destination buffer.
int write_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
initialize_image(tmp_buffer, tmp_buffer_size, 1, 1, mt);
// memset(tmp_buffer, 0xf0, tmp_buffer_size);
size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
// Copy the source region of the cl buffer, to the destination region of the temporary buffer.
CL_EXIT_ERROR(clEnqueueWriteBufferRect(queue,
buffer[dst],
CL_TRUE,
doffset,soffset,
/*sregion,*/dregion,
width[dst], dst_slice_pitch,
width[src], src_slice_pitch,
tmp_buffer,
0, NULL, NULL), "clEnqueueWriteBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
// Copy from the temporary buffer to the host buffer.
size_t spitch = width[src];
size_t sslice = width[src]*height[src];
size_t dpitch = width[dst];
size_t dslice = width[dst]*height[dst];
size_t total = sregion[0] * sregion[1] * sregion[2];
for (size_t i = 0; i != total; ++i) {
// Compute the coordinates of the element within the source and destination regions.
size_t rslice = sregion[0]*sregion[1];
size_t sz = i / rslice;
size_t sy = (i % rslice) / sregion[0];
size_t sx = (i % rslice) % sregion[0];
size_t dz = sz;
size_t dy = sy;
size_t dx = sx;
// Compute the offset in bytes of the source and destination.
size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
verify[dst][d_idx] = tmp_buffer[s_idx];
}
return 0;
}
void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
{
free( data );
}
// This is the main test function for the conformance test.
int
test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements)
{
queue = queue_;
cl_int err;
// Initialize the random number generator.
mt = init_genrand( gRandomSeed );
// Compute a maximum buffer size based on the number of test images and the device maximum.
cl_ulong max_mem_alloc_size = 0;
CL_EXIT_ERROR(clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_mem_alloc_size, NULL),"Could not get device info");
log_info("CL_DEVICE_MAX_MEM_ALLOC_SIZE = %llu bytes.\n", max_mem_alloc_size);
// Confirm that the maximum allocation size is not zero.
if (max_mem_alloc_size == 0) {
log_error("Error: CL_DEVICE_MAX_MEM_ALLOC_SIZE is zero bytes\n");
return -1;
}
// Guess at a reasonable maximum dimension.
size_t max_mem_alloc_dim = (size_t)cbrt((double)(max_mem_alloc_size/sizeof(BufferType)))/alloc_scale;
if (max_mem_alloc_dim == 0) {
max_mem_alloc_dim = max_mem_alloc_size;
}
log_info("Using maximum dimension = %lu.\n", max_mem_alloc_dim);
// Create pairs of cl buffers and host buffers on which operations will be mirrored.
log_info("Creating %u pairs of random sized host and cl buffers.\n", TotalImages);
size_t max_size = 0;
size_t total_bytes = 0;
for (unsigned i=0; i != TotalImages; ++i) {
// Determine a width and height for this buffer.
size_t size_bytes;
size_t tries = 0;
size_t max_tries = 1048576;
do {
width[i] = get_random_size_t(1, max_mem_alloc_dim, mt);
height[i] = get_random_size_t(1, max_mem_alloc_dim, mt);
depth[i] = get_random_size_t(1, max_mem_alloc_dim, mt);
++tries;
} while ((tries < max_tries) && (size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType)) > max_mem_alloc_size);
// Check to see if adequately sized buffers were found.
if (tries >= max_tries) {
log_error("Error: Could not find random buffer sized less than %llu bytes in %lu tries.\n",
max_mem_alloc_size, max_tries);
return -1;
}
// Keep track of the dimensions of the largest buffer.
max_size = (size_bytes > max_size) ? size_bytes : max_size;
total_bytes += size_bytes;
log_info("Buffer[%u] is (%lu,%lu,%lu) = %lu MB (truncated)\n",i,width[i],height[i],depth[i],(size_bytes)/1048576);
}
log_info( "Total size: %lu MB (truncated)\n", total_bytes/1048576 );
// Allocate a temporary buffer for read and write operations.
tmp_buffer_size = max_size;
tmp_buffer = (BufferType*)malloc(tmp_buffer_size);
// Initialize cl buffers
log_info( "Initializing buffers\n" );
for (unsigned i=0; i != TotalImages; ++i) {
size_t size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType);
// Allocate a host copy of the buffer for verification.
verify[i] = (BufferType*)malloc(size_bytes);
CL_EXIT_ERROR(verify[i] ? CL_SUCCESS : -1, "malloc of host buffer failed for buffer %u", i);
// Allocate the buffer in host memory.
backing[i] = (BufferType*)malloc(size_bytes);
CL_EXIT_ERROR(backing[i] ? CL_SUCCESS : -1, "malloc of backing buffer failed for buffer %u", i);
// Generate a random buffer.
log_info( "Initializing buffer %u\n", i );
initialize_image(verify[i], width[i], height[i], depth[i], mt);
// Copy the image into a buffer which will passed to CL.
memcpy(backing[i], verify[i], size_bytes);
// Create the CL buffer.
buffer[i] = clCreateBuffer (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, size_bytes, backing[i], &err);
CL_EXIT_ERROR(err,"clCreateBuffer failed for buffer %u", i);
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
err = clSetMemObjectDestructorCallback( buffer[i], mem_obj_destructor_callback, backing[i] );
CL_EXIT_ERROR(err, "Unable to set mem object destructor callback" );
}
// Main test loop, run num_tries times.
log_info( "Executing %u test operations selected at random.\n", (unsigned)num_tries );
for (size_t iter = 0; iter < num_tries; ++iter) {
// Determine a source and a destination.
size_t src = get_random_size_t(0,TotalImages,mt);
size_t dst = get_random_size_t(0,TotalImages,mt);
// Determine the minimum dimensions.
size_t min_width = width[src] < width[dst] ? width[src] : width[dst];
size_t min_height = height[src] < height[dst] ? height[src] : height[dst];
size_t min_depth = depth[src] < depth[dst] ? depth[src] : depth[dst];
// Generate a random source rectangle within the minimum dimensions.
size_t mx = get_random_size_t(0, min_width-1, mt);
size_t my = get_random_size_t(0, min_height-1, mt);
size_t mz = get_random_size_t(0, min_depth-1, mt);
size_t sw = get_random_size_t(1, (min_width - mx), mt);
size_t sh = get_random_size_t(1, (min_height - my), mt);
size_t sd = get_random_size_t(1, (min_depth - mz), mt);
size_t sx = get_random_size_t(0, width[src]-sw, mt);
size_t sy = get_random_size_t(0, height[src]-sh, mt);
size_t sz = get_random_size_t(0, depth[src]-sd, mt);
size_t soffset[] = { sx, sy, sz };
size_t sregion[] = { sw, sh, sd };
// Generate a destination rectangle of the same size.
size_t dw = sw;
size_t dh = sh;
size_t dd = sd;
// Generate a random destination offset within the buffer.
size_t dx = get_random_size_t(0, (width[dst] - dw), mt);
size_t dy = get_random_size_t(0, (height[dst] - dh), mt);
size_t dz = get_random_size_t(0, (depth[dst] - dd), mt);
size_t doffset[] = { dx, dy, dz };
size_t dregion[] = { dw, dh, dd };
// Execute one of three operations:
// - Copy: Copies between src and dst within each set of host, buffer, and images.
// - Read & verify: Reads src region from buffer and image, and compares to host.
// - Write: Generates new buffer with src dimensions, and writes to cl buffer and image.
enum { TotalOperations = 3 };
size_t operation = get_random_size_t(0,TotalOperations,mt);
switch (operation) {
case 0:
log_info("%lu Copy %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
iter,
src, soffset[0], soffset[1], soffset[2],
dst, doffset[0], doffset[1], doffset[2],
sregion[0], sregion[1], sregion[2],
sregion[0]*sregion[1]*sregion[2]);
if ((err = copy_region(src, soffset, sregion, dst, doffset, dregion)))
return err;
break;
case 1:
log_info("%lu Read %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
iter,
src, soffset[0], soffset[1], soffset[2],
dst, doffset[0], doffset[1], doffset[2],
sregion[0], sregion[1], sregion[2],
sregion[0]*sregion[1]*sregion[2]);
if ((err = read_verify_region(src, soffset, sregion, dst, doffset, dregion)))
return err;
break;
case 2:
log_info("%lu Write %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
iter,
src, soffset[0], soffset[1], soffset[2],
dst, doffset[0], doffset[1], doffset[2],
sregion[0], sregion[1], sregion[2],
sregion[0]*sregion[1]*sregion[2]);
if ((err = write_region(src, soffset, sregion, dst, doffset, dregion)))
return err;
break;
}
#if 0
// Uncomment this section to verify each operation.
// If commented out, verification won't occur until the end of the
// test, and it will not be possible to determine which operation failed.
log_info("Verify src %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", src, 0, 0, 0, width[src], height[src], depth[src]);
if (err = map_verify_region(src))
return err;
log_info("Verify dst %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", dst, 0, 0, 0, width[dst], height[dst], depth[dst]);
if (err = map_verify_region(dst))
return err;
#endif
} // end main for loop.
for (unsigned i=0;i<TotalImages;++i) {
log_info("Verify %u offset (%u,%u,%u) region (%lux%lux%lu)\n", i, 0, 0, 0, width[i], height[i], depth[i]);
if ((err = map_verify_region(i)))
return err;
}
// Clean-up.
free_mtdata(mt);
for (unsigned i=0;i<TotalImages;++i) {
free( verify[i] );
clReleaseMemObject( buffer[i] );
}
free( tmp_buffer );
if (!err) {
log_info("RECT read, write test passed\n");
}
return err;
}

View File

@@ -0,0 +1,262 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *constant_kernel_code =
"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" float ftmp = tmpF[tid]; \n"
" float Itmp = tmpI[tid]; \n"
" out[tid] = ftmp * Itmp; \n"
"}\n";
const char *loop_constant_kernel_code =
"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n"
"{\n"
" int tid = get_global_id(0);\n"
" float sum = 0;\n"
" for (int i = 0; i < num; i++) {\n"
" float pos = i_pos[i*3];\n"
" sum += pos;\n"
" }\n"
" out[tid] = sum;\n"
"}\n";
static int
verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
{
int i;
for (i=0; i < n; i++)
{
float f = tmpF[i] * tmpI[i];
if( out[i] != f )
{
log_error("CONSTANT test failed\n");
return -1;
}
}
log_info("CONSTANT test passed\n");
return 0;
}
static int
verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n)
{
int i;
cl_int j;
for (i=0; i < n; i++)
{
float sum = 0;
for (j=0; j < l; ++j)
sum += tmp[j*3];
if( out[i] != sum )
{
log_error("loop CONSTANT test failed\n");
return -1;
}
}
log_info("loop CONSTANT test passed\n");
return 0;
}
int
test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_int *tmpI;
cl_float *tmpF, *out;
cl_program program;
cl_kernel kernel;
size_t global_threads[3];
int err;
unsigned int i;
cl_ulong maxSize;
size_t num_floats, num_ints, constant_values;
MTdata d;
RoundingMode oldRoundMode;
int isRTZ = 0;
/* Verify our test buffer won't be bigger than allowed */
err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
test_error( err, "Unable to get max constant buffer size" );
log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize);
maxSize/=4;
num_ints = (size_t)maxSize/sizeof(cl_int);
num_floats = (size_t)maxSize/sizeof(cl_float);
if (num_ints >= num_floats) {
constant_values = num_floats;
} else {
constant_values = num_ints;
}
log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n",
constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float));
tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
out = (cl_float*)malloc(sizeof(cl_float) * constant_values);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * constant_values, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
d = init_genrand( gRandomSeed );
for (i=0; i<constant_values; i++) {
tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
}
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" );
if (err) {
log_error("Failed to create kernel and program: %d\n", err);
return -1;
}
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
global_threads[0] = constant_values;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed: %d\n", err);
return -1;
}
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
{
oldRoundMode = set_round(kRoundTowardZero, kfloat);
isRTZ = 1;
}
err = verify(tmpF, tmpI, out, (int)constant_values);
if (isRTZ)
(void)set_round(oldRoundMode, kfloat);
// Loop constant buffer test
cl_program loop_program;
cl_kernel loop_kernel;
cl_int limit = 2;
memset(out, 0, sizeof(cl_float) * constant_values);
err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1,
&loop_constant_kernel_code, "loop_constant_kernel" );
if (err) {
log_error("Failed to create loop kernel and program: %d\n", err);
return -1;
}
err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit);
if (err != CL_SUCCESS) {
log_error("clSetKernelArgs for loop kernel failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS) {
log_error("clEnqueueNDRangeKernel failed: %d\n", err);
return -1;
}
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
if (err != CL_SUCCESS) {
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_loop_constant(tmpF, out, limit, (int)constant_values);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseKernel(loop_kernel);
clReleaseProgram(loop_program);
free(tmpI);
free(tmpF);
free(out);
return err;
}

View File

@@ -0,0 +1,101 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *constant_source_kernel_code[] = {
"__constant int outVal = 42;\n"
"__constant int outIndex = 7;\n"
"__constant int outValues[ 16 ] = { 17, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };\n"
"\n"
"__kernel void constant_kernel( __global int *out )\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" if( tid == 0 )\n"
" {\n"
" out[ 0 ] = outVal;\n"
" out[ 1 ] = outValues[ outIndex ];\n"
" }\n"
" else\n"
" {\n"
" out[ tid + 1 ] = outValues[ tid ];\n"
" }\n"
"}\n" };
int test_constant_source(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper outStream;
cl_int outValues[ 17 ];
cl_int expectedValues[ 17 ] = { 42, 1985, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };
cl_int error;
// Create a kernel to test with
error = create_single_kernel_helper( context, &program, &kernel, 1, constant_source_kernel_code, "constant_kernel" );
test_error( error, "Unable to create testing kernel" );
// Create our output buffer
outStream = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof( outValues ), NULL, &error );
test_error( error, "Unable to create output buffer" );
// Set the argument
error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
test_error( error, "Unable to set kernel argument" );
// Run test kernel
size_t threads[ 1 ] = { 16 };
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Unable to enqueue kernel" );
// Read results
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof( outValues ), outValues, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Verify results
for( int i = 0; i < 17; i++ )
{
if( expectedValues[ i ] != outValues[ i ] )
{
if( i == 0 )
log_error( "ERROR: Output value %d from constant source global did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
else if( i == 1 )
log_error( "ERROR: Output value %d from constant-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
else
log_error( "ERROR: Output value %d from variable-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,105 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *sample_single_kernel = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"};
const char *sample_double_kernel = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"
"__kernel void sample_test2(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"};
int
test_createkernelsinprogram(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_program program;
cl_kernel kernel[2];
unsigned int num_kernels;
int err;
err = create_single_kernel_helper(context, &program, NULL, 1, &sample_single_kernel, NULL);
if (err != CL_SUCCESS)
{
log_error("create_single_kernel_helper failed\n");
return -1;
}
err = clCreateKernelsInProgram(program, 1, kernel, &num_kernels);
if ( (err != CL_SUCCESS) || (num_kernels != 1) )
{
log_error("clCreateKernelsInProgram test failed for a single kernel\n");
return -1;
}
clReleaseKernel(kernel[0]);
clReleaseProgram(program);
err = create_single_kernel_helper(context, &program, NULL, 1, &sample_double_kernel, NULL);
if (err != CL_SUCCESS)
{
log_error("create_single_kernel_helper failed\n");
return -1;
}
err = clCreateKernelsInProgram(program, 2, kernel, &num_kernels);
if ( (err != CL_SUCCESS) || (num_kernels != 2) )
{
log_error("clCreateKernelsInProgram test failed for two kernels\n");
return -1;
}
log_info("clCreateKernelsInProgram test passed\n");
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
return err;
}

View File

@@ -0,0 +1,254 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
const cl_mem_flags flag_set[] = {
CL_MEM_ALLOC_HOST_PTR,
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_USE_HOST_PTR,
CL_MEM_COPY_HOST_PTR,
0
};
const char* flag_set_names[] = {
"CL_MEM_ALLOC_HOST_PTR",
"CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
"CL_MEM_USE_HOST_PTR",
"CL_MEM_COPY_HOST_PTR",
"0"
};
int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
const size_t bufferSize = 256*256;
int src_flag_id;
MTdata d = init_genrand( gRandomSeed );
cl_char *initialData = (cl_char*)malloc(bufferSize);
cl_char *finalData = (cl_char*)malloc(bufferSize);
for (src_flag_id=0; src_flag_id < sizeof(flag_set)/sizeof(flag_set[0]); src_flag_id++)
{
clMemWrapper memObject;
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
generate_random_data( kChar, (unsigned int)bufferSize, d, initialData );
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
memObject = clCreateBuffer(context, flag_set[src_flag_id], bufferSize * sizeof( cl_char ), initialData, &error);
else
memObject = clCreateBuffer(context, flag_set[src_flag_id], bufferSize * sizeof( cl_char ), NULL, &error);
test_error( error, "Unable to create testing buffer" );
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
{
error = clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize * sizeof( cl_char ), initialData, 0, NULL, NULL);
test_error( error, "clEnqueueWriteBuffer failed");
}
for( int i = 0; i < 128; i++ )
{
size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d );
size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d );
cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
offset, length, 0, NULL, NULL, &error );
if( error != CL_SUCCESS )
{
print_error( error, "clEnqueueMapBuffer call failed" );
log_error( "\tOffset: %d Length: %d\n", (int)offset, (int)length );
free( initialData );
free( finalData );
free_mtdata(d);
return -1;
}
// Write into the region
for( size_t j = 0; j < length; j++ )
{
cl_char spin = (cl_char)genrand_int32( d );
// Test read AND write in one swipe
cl_char value = mappedRegion[ j ];
value = spin - value;
mappedRegion[ j ] = value;
// Also update the initial data array
value = initialData[ offset + j ];
value = spin - value;
initialData[ offset + j ] = value;
}
// Unmap
error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
test_error( error, "Unable to unmap buffer" );
}
// Final validation: read actual values of buffer and compare against our reference
error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, sizeof( cl_char ) * bufferSize, finalData, 0, NULL, NULL );
test_error( error, "Unable to read results" );
for( size_t q = 0; q < bufferSize; q++ )
{
if( initialData[ q ] != finalData[ q ] )
{
log_error( "ERROR: Sample %d did not validate! Got %d, expected %d\n", (int)q, (int)finalData[ q ], (int)initialData[ q ] );
free( initialData );
free( finalData );
free_mtdata(d);
return -1;
}
}
} // cl_mem flags
free( initialData );
free( finalData );
free_mtdata(d);
return 0;
}
int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 };
const size_t imageSize = 256;
int src_flag_id;
cl_uint *initialData;
cl_uint *finalData;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
initialData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
finalData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
if( !is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &format ) )
{
log_error( "ERROR: Test requires basic OpenCL 1.0 format CL_RGBA:CL_UNSIGNED_INT32, which is unsupported by this device!\n" );
free(initialData);
free(finalData);
return -1;
}
d = init_genrand( gRandomSeed );
for (src_flag_id=0; src_flag_id < sizeof(flag_set)/sizeof(flag_set[0]); src_flag_id++) {
clMemWrapper memObject;
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
generate_random_data( kUInt, (unsigned int)( imageSize * imageSize ), d, initialData );
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
imageSize, imageSize, 0, initialData, &error );
else
memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
imageSize, imageSize, 0, NULL, &error );
test_error( error, "Unable to create testing buffer" );
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1};
error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, NULL, NULL, initialData, 0, NULL, NULL);
test_error( error, "Unable to write to testing buffer" );
}
for( int i = 0; i < 128; i++ )
{
size_t offset[3], region[3];
size_t rowPitch;
offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d );
offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d );
offset[ 2 ] = 0;
region[ 2 ] = 1;
cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
offset, region, &rowPitch, NULL, 0, NULL, NULL, &error );
if( error != CL_SUCCESS )
{
print_error( error, "clEnqueueMapImage call failed" );
log_error( "\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] );
free(initialData);
free(finalData);
free_mtdata(d);
return -1;
}
// Write into the region
cl_uint *mappedPtr = mappedRegion;
for( size_t y = 0; y < region[ 1 ]; y++ )
{
for( size_t x = 0; x < region[ 0 ] * 4; x++ )
{
cl_int spin = (cl_int)random_in_range( 16, 1024, d );
cl_int value;
// Test read AND write in one swipe
value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ];
value = spin - value;
mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value;
// Also update the initial data array
value = initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ];
value = spin - value;
initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ] = value;
}
}
// Unmap
error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
test_error( error, "Unable to unmap buffer" );
}
// Final validation: read actual values of buffer and compare against our reference
size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 };
error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL );
test_error( error, "Unable to read results" );
for( size_t q = 0; q < imageSize * imageSize * 4; q++ )
{
if( initialData[ q ] != finalData[ q ] )
{
log_error( "ERROR: Sample %d (coord %d,%d) did not validate! Got %d, expected %d\n", (int)q, (int)( ( q / 4 ) % imageSize ), (int)( ( q / 4 ) / imageSize ),
(int)finalData[ q ], (int)initialData[ q ] );
free(initialData);
free(finalData);
free_mtdata(d);
return -1;
}
}
} // cl_mem_flags
free(initialData);
free(finalData);
free_mtdata(d);
return 0;
}

View File

@@ -0,0 +1,123 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "../../test_common/harness/rounding_mode.h"
#include "procs.h"
static const char *enqueued_local_size_2d_code =
"__kernel void test_enqueued_local_size_2d(global int *dst)\n"
"{\n"
" if ((get_global_id(0) == 0) && (get_global_id(1) == 0))\n"
" {\n"
" dst[0] = (int)get_enqueued_local_size(0)\n;"
" dst[1] = (int)get_enqueued_local_size(1)\n;"
" }\n"
"}\n";
static const char *enqueued_local_size_1d_code =
"__kernel void test_enqueued_local_size_1d(global int *dst)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" if (get_global_id(0) == 0)\n"
" {\n"
" dst[tid_x] = (int)get_enqueued_local_size(0)\n;"
" }\n"
"}\n";
static int
verify_enqueued_local_size(int *result, size_t *expected, int n)
{
int i;
for (i=0; i<n; i++)
{
if (result[i] != (int)expected[i])
{
log_error("get_enqueued_local_size failed\n");
return -1;
}
}
log_info("get_enqueued_local_size passed\n");
return 0;
}
int
test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams;
cl_program program[2];
cl_kernel kernel[2];
int *output_ptr;
size_t globalsize[2];
size_t localsize[2];
int err;
output_ptr = (int*)malloc(2 * sizeof(int));
streams = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), 2*sizeof(int), NULL, &err);
test_error( err, "clCreateBuffer failed.");
err = create_single_kernel_helper_with_build_options(context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code, "test_enqueued_local_size_1d", "-cl-std=CL2.0");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper_with_build_options(context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code, "test_enqueued_local_size_2d", "-cl-std=CL2.0");
test_error( err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[1], 0, sizeof streams, &streams);
test_error( err, "clSetKernelArgs failed.");
globalsize[0] = (size_t)num_elements;
globalsize[1] = (size_t)num_elements;
localsize[0] = 16;
localsize[1] = 11;
err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, globalsize, localsize, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
err = verify_enqueued_local_size(output_ptr, localsize, 2);
globalsize[0] = (size_t)num_elements;
localsize[0] = 9;
err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, globalsize, localsize, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
err = verify_enqueued_local_size(output_ptr, localsize, 1);
// cleanup
clReleaseMemObject(streams);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program[0]);
clReleaseProgram(program[1]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,385 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
#define DECLARE_S2V_IDENT_KERNEL(srctype,dsttype,size) \
"__kernel void test_conversion(__global " srctype " *sourceValues, __global " dsttype #size " *destValues )\n" \
"{\n" \
" int tid = get_global_id(0);\n" \
" " srctype " src = sourceValues[tid];\n" \
"\n" \
" destValues[tid] = (" dsttype #size ")src;\n" \
"\n" \
"}\n"
#define DECLARE_S2V_IDENT_KERNELS(srctype,dsttype) \
{ \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,2), \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,4), \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,8), \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,16) \
}
#define DECLARE_EMPTY { NULL, NULL, NULL, NULL, NULL }
/* Note: the next four arrays all must match in order and size to the ExplicitTypes enum in conversions.h!!! */
#define DECLARE_S2V_IDENT_KERNELS_SET(srctype) \
{ \
DECLARE_S2V_IDENT_KERNELS(#srctype,bool), \
DECLARE_S2V_IDENT_KERNELS(#srctype,char), \
DECLARE_S2V_IDENT_KERNELS(#srctype,uchar), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned char), \
DECLARE_S2V_IDENT_KERNELS(#srctype,short), \
DECLARE_S2V_IDENT_KERNELS(#srctype,ushort), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned short), \
DECLARE_S2V_IDENT_KERNELS(#srctype,int), \
DECLARE_S2V_IDENT_KERNELS(#srctype,uint), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned int), \
DECLARE_S2V_IDENT_KERNELS(#srctype,long), \
DECLARE_S2V_IDENT_KERNELS(#srctype,ulong), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned long), \
DECLARE_S2V_IDENT_KERNELS(#srctype,float), \
DECLARE_EMPTY \
}
#define DECLARE_EMPTY_SET \
{ \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY \
}
/* The overall array */
const char * kernel_explicit_s2v_set[kNumExplicitTypes][kNumExplicitTypes][5] = {
DECLARE_S2V_IDENT_KERNELS_SET(bool),
DECLARE_S2V_IDENT_KERNELS_SET(char),
DECLARE_S2V_IDENT_KERNELS_SET(uchar),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned char),
DECLARE_S2V_IDENT_KERNELS_SET(short),
DECLARE_S2V_IDENT_KERNELS_SET(ushort),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned short),
DECLARE_S2V_IDENT_KERNELS_SET(int),
DECLARE_S2V_IDENT_KERNELS_SET(uint),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned int),
DECLARE_S2V_IDENT_KERNELS_SET(long),
DECLARE_S2V_IDENT_KERNELS_SET(ulong),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned long),
DECLARE_S2V_IDENT_KERNELS_SET(float),
DECLARE_EMPTY_SET
};
int test_explicit_s2v_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *programSrc,
ExplicitType srcType, unsigned int count, ExplicitType destType, unsigned int vecSize, void *inputData )
{
clProgramWrapper program;
clKernelWrapper kernel;
int error;
clMemWrapper streams[2];
void *outData;
unsigned char convertedData[ 8 ]; /* Max type size is 8 bytes */
size_t threadSize[3], groupSize[3];
unsigned int i, s;
unsigned char *inPtr, *outPtr;
size_t paramSize, destTypeSize;
const char* finalProgramSrc[2] = {
"", // optional pragma
programSrc
};
if (srcType == kDouble || destType == kDouble) {
finalProgramSrc[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
}
if( programSrc == NULL )
return 0;
paramSize = get_explicit_type_size( srcType );
destTypeSize = get_explicit_type_size( destType );
size_t destStride = destTypeSize * vecSize;
outData = malloc( destStride * count );
if( create_single_kernel_helper( context, &program, &kernel, 2, finalProgramSrc, "test_conversion" ) )
{
log_info( "****** %s%s *******\n", finalProgramSrc[0], finalProgramSrc[1] );
return -1;
}
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error);
test_error( error, "clCreateBuffer failed");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), destStride * count, NULL, &error);
test_error( error, "clCreateBuffer failed");
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
test_error( error, "Unable to set indexed kernel arguments" );
/* Run the kernel */
threadSize[0] = count;
error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL );
test_error( error, "Unable to execute test kernel" );
/* Now verify the results. Each value should have been duplicated four times, and we should be able to just
do a memcpy instead of relying on the actual type of data */
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, destStride * count, outData, 0, NULL, NULL );
test_error( error, "Unable to read output values!" );
inPtr = (unsigned char *)inputData;
outPtr = (unsigned char *)outData;
for( i = 0; i < count; i++ )
{
/* Convert the input data element to our output data type to compare against */
convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
/* Now compare every element of the vector */
for( s = 0; s < vecSize; s++ )
{
if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
{
unsigned int *p = (unsigned int *)outPtr;
log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
log_error( " Input: 0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
log_error( " Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
return -1;
}
}
inPtr += paramSize;
outPtr += destStride;
}
free( outData );
return 0;
}
int test_explicit_s2v_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, ExplicitType srcType,
unsigned int count, void *inputData )
{
unsigned int sizes[] = { 2, 4, 8, 16, 0 };
int i, dstType, failed = 0;
for( dstType = kBool; dstType < kNumExplicitTypes; dstType++ )
{
if( dstType == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
continue;
if (( dstType == kLong || dstType == kULong ) && !gHasLong )
continue;
for( i = 0; sizes[i] != 0; i++ )
{
if( dstType != srcType )
continue;
if( strchr( get_explicit_type_name( (ExplicitType)srcType ), ' ' ) != NULL ||
strchr( get_explicit_type_name( (ExplicitType)dstType ), ' ' ) != NULL )
continue;
if( test_explicit_s2v_function( deviceID, context, queue, kernel_explicit_s2v_set[ srcType ][ dstType ][ i ],
srcType, count, (ExplicitType)dstType, sizes[ i ], inputData ) != 0 )
{
log_error( "ERROR: Explicit cast of scalar %s to vector %s%d FAILED; skipping other %s vector tests\n",
get_explicit_type_name(srcType), get_explicit_type_name((ExplicitType)dstType), sizes[i], get_explicit_type_name((ExplicitType)dstType) );
failed = -1;
break;
}
}
}
return failed;
}
int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
log_info( "NOTE: Boolean vectors not defined in OpenCL 1.0. Skipping test.\n" );
return 0;
#if 0
bool data[128];
generate_random_data( kBool, 128, data );
return test_explicit_s2v_function_set( deviceID, context, queue, kBool, 128, data );
#endif
}
int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
char data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kChar, 128, seed, data );
return test_explicit_s2v_function_set( deviceID, context, queue, kChar, 128, data );
}
int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
unsigned char data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kUChar, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kUChar, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedChar, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
short data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kShort, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kShort, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
unsigned short data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kUShort, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kUShort, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedShort, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kInt, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kInt, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
unsigned int data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kUInt, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kUInt, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedInt, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_long data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kLong, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kLong, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_ulong data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kULong, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kULong, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedLong, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
float data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kFloat, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kFloat, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
double data[128];
RandomSeed seed(gRandomSeed);
if( !is_extension_available( deviceID, "cl_khr_fp64" ) ) {
log_info("Extension cl_khr_fp64 not supported. Skipping test.\n");
return 0;
}
generate_random_data( kDouble, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kDouble, 128, data ) != 0 )
return -1;
return 0;
}

View File

@@ -0,0 +1,145 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *float2int_kernel_code =
"__kernel void test_float2int(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n";
int
verify_float2int(cl_float *inptr, cl_int *outptr, int n)
{
int i;
for (i=0; i<n; i++)
{
if (outptr[i] != (int)inptr[i])
{
log_error("FLOAT2INT test failed\n");
return -1;
}
}
log_info("FLOAT2INT test passed\n");
return 0;
}
int
test_float2int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_float *input_ptr;
cl_int *output_ptr;
cl_program program;
cl_kernel kernel;
void *values[2];
size_t threads[1];
int err;
int i;
MTdata d;
input_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*num_elements, (void *)input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &float2int_kernel_code, "test_float2int");
if (err != CL_SUCCESS)
{
log_error("create_single_kernel_helper failed\n");
return -1;
}
values[0] = streams[0];
values[1] = streams[1];
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err = clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (size_t)num_elements;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_float2int(input_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,271 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "../../test_common/harness/rounding_mode.h"
#include "procs.h"
static const char *fpadd_kernel_code =
"__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
static const char *fpsub_kernel_code =
"__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
static const char *fpmul_kernel_code =
"__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
static const float MAX_ERR = 1e-5f;
static int
verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("FP_ADD float test failed\n");
return -1;
}
}
log_info("FP_ADD float test passed\n");
return 0;
}
static int
verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("FP_SUB float test failed\n");
return -1;
}
}
log_info("FP_SUB float test passed\n");
return 0;
}
static int
verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("FP_MUL float test failed\n");
return -1;
}
}
log_info("FP_MUL float test passed\n");
return 0;
}
int
test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[3];
cl_kernel kernel[3];
float *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_float) * num_elements;
int isRTZ = 0;
RoundingMode oldMode = kDefaultRoundingMode;
// check for floating point capabilities
cl_device_fp_config single_config = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
if (err) {
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
test_finish();
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
{
//Check to make sure we are an embedded device
char profile[32];
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if( err )
{
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
test_finish();
return -1;
}
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
{
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
test_finish();
return -1;
}
isRTZ = 1;
oldMode = get_round();
}
input_ptr[0] = (cl_float*)malloc(length);
input_ptr[1] = (cl_float*)malloc(length);
input_ptr[2] = (cl_float*)malloc(length);
output_ptr = (cl_float*)malloc(length);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
p = input_ptr[0];
for (i=0; i<num_elements; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[1];
for (i=0; i<num_elements; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[2];
for (i=0; i<num_elements; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error( err, "clEnqueueWriteBuffer failed.");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error( err, "clEnqueueWriteBuffer failed.");
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
test_error( err, "clEnqueueWriteBuffer failed.");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
test_error( err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
threads[0] = (unsigned int)num_elements;
for (i=0; i<3; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
if( isRTZ )
set_round( kRoundTowardZero, kfloat );
switch (i)
{
case 0:
err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
}
if( isRTZ )
set_round( oldMode, kfloat );
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<3; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
free_mtdata( d );
return err;
}

View File

@@ -0,0 +1,269 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "../../test_common/harness/rounding_mode.h"
#include "procs.h"
const char *fpadd2_kernel_code =
"__kernel void test_fpadd2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *fpsub2_kernel_code =
"__kernel void test_fpsub2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *fpmul2_kernel_code =
"__kernel void test_fpmul2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
int
verify_fpadd2(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("FP_ADD float2 test failed\n");
return -1;
}
}
log_info("FP_ADD float2 test passed\n");
return 0;
}
int
verify_fpsub2(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("FP_SUB float2 test failed\n");
return -1;
}
}
log_info("FP_SUB float2 test passed\n");
return 0;
}
int
verify_fpmul2(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("FP_MUL float2 test failed\n");
return -1;
}
}
log_info("FP_MUL float2 test passed\n");
return 0;
}
int
test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[3];
cl_kernel kernel[3];
cl_float *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_float) * 2 * num_elements;
int isRTZ = 0;
RoundingMode oldMode = kDefaultRoundingMode;
// check for floating point capabilities
cl_device_fp_config single_config = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
if (err) {
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
test_finish();
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
{
//Check to make sure we are an embedded device
char profile[32];
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if( err )
{
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
test_finish();
return -1;
}
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
{
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
test_finish();
return -1;
}
isRTZ = 1;
oldMode = get_round();
}
input_ptr[0] = (cl_float*)malloc(length);
input_ptr[1] = (cl_float*)malloc(length);
input_ptr[2] = (cl_float*)malloc(length);
output_ptr = (cl_float*)malloc(length);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
p = input_ptr[0];
for (i=0; i<num_elements*2; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
p = input_ptr[1];
for (i=0; i<num_elements*2; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
p = input_ptr[2];
for (i=0; i<num_elements*2; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd2_kernel_code, "test_fpadd2");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub2_kernel_code, "test_fpsub2");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul2_kernel_code, "test_fpmul2");
test_error( err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
free_mtdata(d);
d = NULL;
threads[0] = (unsigned int)num_elements;
for (i=0; i<3; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
if( isRTZ )
set_round( kRoundTowardZero, kfloat );
switch (i)
{
case 0:
err = verify_fpadd2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
break;
case 1:
err = verify_fpsub2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
break;
case 2:
err = verify_fpmul2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
break;
}
if( isRTZ )
set_round( oldMode, kfloat );
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<3; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,270 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/rounding_mode.h"
const char *fpadd4_kernel_code =
"__kernel void test_fpadd4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *fpsub4_kernel_code =
"__kernel void test_fpsub4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *fpmul4_kernel_code =
"__kernel void test_fpmul4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
int
verify_fpadd4(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("FP_ADD float4 test failed\n");
return -1;
}
}
log_info("FP_ADD float4 test passed\n");
return 0;
}
int
verify_fpsub4(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("FP_SUB float4 test failed\n");
return -1;
}
}
log_info("FP_SUB float4 test passed\n");
return 0;
}
int
verify_fpmul4(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("FP_MUL float4 test failed\n");
return -1;
}
}
log_info("FP_MUL float4 test passed\n");
return 0;
}
int
test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[3];
cl_kernel kernel[3];
cl_float *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_float) * 4 * num_elements;
int isRTZ = 0;
RoundingMode oldMode = kDefaultRoundingMode;
// check for floating point capabilities
cl_device_fp_config single_config = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
if (err) {
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
test_finish();
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
{
//Check to make sure we are an embedded device
char profile[32];
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if( err )
{
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
test_finish();
return -1;
}
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
{
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
test_finish();
return -1;
}
isRTZ = 1;
oldMode = get_round();
}
input_ptr[0] = (cl_float*)malloc(length);
input_ptr[1] = (cl_float*)malloc(length);
input_ptr[2] = (cl_float*)malloc(length);
output_ptr = (cl_float*)malloc(length);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
p = input_ptr[0];
for (i=0; i<num_elements*4; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[1];
for (i=0; i<num_elements*4; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[2];
for (i=0; i<num_elements*4; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
free_mtdata(d);
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd4_kernel_code, "test_fpadd4");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub4_kernel_code, "test_fpsub4");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul4_kernel_code, "test_fpmul4");
test_error( err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
threads[0] = (unsigned int)num_elements;
for (i=0; i<3; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
if( isRTZ )
set_round( kRoundTowardZero, kfloat );
switch (i)
{
case 0:
err = verify_fpadd4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
break;
case 1:
err = verify_fpsub4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
break;
case 2:
err = verify_fpmul4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
break;
}
if( isRTZ )
set_round( oldMode, kfloat );
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<3; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,191 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
#include <ctype.h>
static const char *linear_ids_source[1] = {
"__kernel void test_linear_ids(__global int2 *out)\n"
"{\n"
" size_t lid, gid;\n"
" uint d = get_work_dim();\n"
" if (d == 1U) {\n"
" gid = get_global_id(0) - get_global_offset(0);\n"
" lid = get_local_id(0);\n"
" } else if (d == 2U) {\n"
" gid = (get_global_id(1) - get_global_offset(1)) * get_global_size(0) +\n"
" (get_global_id(0) - get_global_offset(0));\n"
" lid = get_local_id(1) * get_local_size(0) + get_local_id(0);\n"
" } else {\n"
" gid = ((get_global_id(2) - get_global_offset(2)) * get_global_size(1) +\n"
" (get_global_id(1) - get_global_offset(1))) * get_global_size(0) +\n"
" (get_global_id(0) - get_global_offset(0));\n"
" lid = (get_local_id(2) * get_local_size(1) +\n"
" get_local_id(1)) * get_local_size(0) + get_local_id(0);\n"
" }\n"
" out[gid].x = gid == get_global_linear_id();\n"
" out[gid].y = lid == get_local_linear_id();\n"
"}\n"
};
#define NUM_ITER 12
#define MAX_1D 4096
#define MAX_2D 64
#define MAX_3D 16
#define MAX_OFFSET 100000
int
test_get_linear_ids(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper outbuf;
int error, iter, i, j, k;
size_t lws[3], gws[3], gwo[3];
cl_uint dims;
cl_int outmem[2*MAX_1D], *om;
// Create the kernel
error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, linear_ids_source, "test_linear_ids", "-cl-std=CL2.0");
if (error)
return error;
// Create the out buffer
outbuf = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(outmem), NULL, &error);
test_error(error, "failed to create result buffer\n");
// This will leak if there is an error, but this is what is done everywhere else
MTdata seed = init_genrand(gRandomSeed);
// Run some tests
for (iter=0; iter<NUM_ITER; ++iter) {
dims = iter % 3 + 1;
switch (dims) {
case 1:
gwo[0] = random_in_range(0, MAX_OFFSET, seed);
gws[0] = random_in_range(MAX_1D/8, MAX_1D/4, seed)*4;
error = get_max_common_work_group_size(context, kernel, gws[0], lws);
break;
case 2:
gwo[0] = random_in_range(0, MAX_OFFSET, seed);
gwo[1] = random_in_range(0, MAX_OFFSET, seed);
gws[0] = random_in_range(MAX_2D/8, MAX_2D/4, seed)*4;
gws[1] = random_in_range(MAX_2D/8, MAX_2D/4, seed)*4;
error = get_max_common_2D_work_group_size(context, kernel, gws, lws);
break;
case 3:
gwo[0] = random_in_range(0, MAX_OFFSET, seed);
gwo[1] = random_in_range(0, MAX_OFFSET, seed);
gwo[2] = random_in_range(0, MAX_OFFSET, seed);
gws[0] = random_in_range(MAX_3D/4, MAX_3D/2, seed)*2;
gws[1] = random_in_range(MAX_3D/4, MAX_3D/2, seed)*2;
gws[2] = random_in_range(MAX_3D/4, MAX_3D/2, seed)*2;
error = get_max_common_3D_work_group_size(context, kernel, gws, lws);
break;
}
test_error(error, "Failed to determine local work size\n");
switch (dims) {
case 1:
log_info(" testing offset=%u global=%u local=%u...\n", gwo[0], gws[0], lws[0]);
break;
case 2:
log_info(" testing offset=(%u,%u) global=(%u,%u) local=(%u,%u)...\n",
gwo[0], gwo[1], gws[0], gws[1], lws[0], lws[1]);
break;
case 3:
log_info(" testing offset=(%u,%u,%u) global=(%u,%u,%u) local=(%u,%u,%u)...\n",
gwo[0], gwo[1], gwo[2], gws[0], gws[1], gws[2], lws[0], lws[1], lws[2]);
break;
}
// Set up and run
memset(outmem, 0, sizeof(outmem));
error = clSetKernelArg(kernel, 0, sizeof(outbuf), (void *)&outbuf);
test_error(error, "clSetKernelArg failed\n");
error = clEnqueueWriteBuffer(queue, outbuf, CL_FALSE, 0, sizeof(outmem), (void *)outmem, 0, NULL, NULL);
test_error(error, "clEnqueueWriteBuffer failed\n");
error = clEnqueueNDRangeKernel(queue, kernel, dims, gwo, gws, lws, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed\n");
error = clEnqueueReadBuffer(queue, outbuf, CL_FALSE, 0, sizeof(outmem), (void *)outmem, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed\n");
error = clFinish(queue);
test_error(error, "clFinish failed\n");
// Check the return
switch (dims) {
case 1:
for (i=0, om=outmem; i<(int)gws[0]; ++i, om+=2) {
if (om[0] != 1) {
log_error("get_global_linear_id() failed at %d\n", i);
return -1;
}
if (om[1] != 1) {
log_error("get_local_linear_id() failed at (%d, %d)\n", i % (int)lws[0], i / (int)lws[0]);
return -1;
}
}
break;
case 2:
for (j=0, om=outmem; j<gws[1]; ++j) {
for (i=0; i<gws[0]; ++i, om+=2) {
if (om[0] != 1) {
log_error("get_global_linear_id() failed at (%d,%d)\n", i, j);
return -1;
}
if (om[1] != 1) {
log_error("get_local_linear_id() failed at (%d, %d), (%d, %d)\n",
i % (int)lws[0], j % (int)lws[1],
i / (int)lws[0], j / (int)lws[1]);
return -1;
}
}
}
break;
case 3:
for (k=0, om=outmem; k<gws[2]; ++k) {
for (j=0; j<gws[1]; ++j) {
for (i=0; i<gws[0]; ++i, om+=2) {
if (om[0] != 1) {
log_error("get_global_linear_id() failed at (%d,%d, %d)\n", i, j, k);
return -1;
}
if (om[1] != 1) {
log_error("get_local_linear_id() failed at (%d, %d), (%d, %d), (%d, %d)\n",
i % (int)lws[0], j % (int)lws[1], k % (int)lws[2],
i / (int)lws[0], j / (int)lws[1], k / (int)lws[2]);
return -1;
}
}
}
}
break;
}
}
free_mtdata(seed);
return 0;
}

Some files were not shown because too many files have changed in this diff Show More