Initial open source release of OpenCL 2.0 CTS.

This commit is contained in:
Kedar Patil
2017-05-16 18:50:35 +05:30
parent 6911ba5116
commit 3a440d17c8
883 changed files with 318212 additions and 0 deletions

View File

@@ -0,0 +1,24 @@
set(MODULE_NAME SVM)
set(${MODULE_NAME}_SOURCES
main.cpp
test_allocate_shared_buffer.cpp
test_byte_granularity.cpp
test_cross_buffer_pointers.cpp
test_enqueue_api.cpp
test_fine_grain_memory_consistency.cpp
test_fine_grain_sync_buffers.cpp
test_pointer_passing.cpp
test_set_kernel_exec_info_svm_ptrs.cpp
test_shared_address_space_coarse_grain.cpp
test_shared_address_space_fine_grain.cpp
test_shared_address_space_fine_grain_buffers.cpp
test_shared_sub_buffers.cpp
../../test_common/harness/testHarness.c
../../test_common/harness/errorHelpers.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/mt19937.c
../../test_common/harness/msvc9.c
)
include(../CMakeCommon.txt)

View File

@@ -0,0 +1,53 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.c \
test_allocate_shared_buffer.cpp \
test_byte_granularity.cpp \
test_cross_buffer_pointers.cpp \
test_enqueue_api.cpp \
test_fine_grain_memory_consistency.cpp \
test_fine_grain_sync_buffers.cpp \
test_pointer_passing.cpp \
test_set_kernel_exec_info_svm_ptrs.cpp \
test_shared_address_space_coarse_grain.cpp \
test_shared_address_space_fine_grain_buffers.cpp \
test_shared_address_space_fine_grain.cpp \
test_shared_sub_buffers.cpp \
../../test_common/harness/errorHelpers.c \
../../test_common/harness/threadTesting.c \
../../test_common/harness/testHarness.c \
../../test_common/harness/kernelHelpers.c \
../../test_common/harness/typeWrappers.cpp \
../../test_common/harness/mt19937.c \
DEFINES = DONT_TEST_GARBAGE_POINTERS
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
HEADERS =
TARGET = test_SVM
INCLUDE =
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,100 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef __COMMON_H__
#define __COMMON_H__
#include "../../test_common/harness/compat.h"
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/typeWrappers.h"
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
#include <windows.h>
#endif
typedef enum {
memory_order_relaxed,
memory_order_acquire,
memory_order_release,
memory_order_acq_rel,
memory_order_seq_cst
} cl_memory_order;
cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order);
cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o);
template <typename T>
bool AtomicCompareExchangeStrongExplicit(volatile T *a, T *expected, T desired,
cl_memory_order order_success,
cl_memory_order order_failure)
{
T tmp;
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
tmp = (T)InterlockedCompareExchange((volatile LONG *)a, (LONG)desired, *(LONG *)expected);
#elif defined(__GNUC__)
tmp = (T)__sync_val_compare_and_swap((volatile intptr_t*)a, (intptr_t)(*expected), (intptr_t)desired);
#else
log_info("Host function not implemented: atomic_compare_exchange\n");
tmp = 0;
#endif
if(tmp == *expected)
return true;
*expected = tmp;
return false;
}
// this checks for a NULL ptr and/or an error code
#define test_error2(error_code, ptr, msg) { if(error != 0) { test_error(error_code, msg); } else { if(NULL == ptr) {print_null_error(msg); return -1;} } }
#define print_null_error(msg) log_error("ERROR: %s! (NULL pointer detected %s:%d)\n", msg, __FILE__, __LINE__ );
// max possible number of queues needed, 1 for each device in platform.
#define MAXQ 32
typedef struct Node{
cl_int global_id;
cl_int position_in_list;
struct Node* pNext;
} Node;
extern void create_linked_lists(Node* pNodes, size_t num_lists, int list_length);
extern cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length);
extern cl_int create_linked_lists_on_device(int qi, cl_command_queue q, cl_mem allocator, cl_kernel k, size_t numLists );
extern cl_int verify_linked_lists_on_device(int qi, cl_command_queue q, cl_mem num_correct, cl_kernel k, cl_int ListLength, size_t numLists );
extern cl_int create_linked_lists_on_device_no_map(int qi, cl_command_queue q, size_t *pAllocator, cl_kernel k, size_t numLists );
extern cl_int verify_linked_lists_on_device_no_map(int qi, cl_command_queue q, cl_int *pNum_correct, cl_kernel k, cl_int ListLength, size_t numLists );
extern int test_byte_granularity(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_fine_grain_memory_consistency(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_fine_grain_sync_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_svm_pointer_passing(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_allocate_shared_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_shared_sub_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_enqueue_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps);
extern const char *linked_list_create_and_verify_kernels[];
#endif // #ifndef __COMMON_H__

View File

@@ -0,0 +1,338 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <vector>
#include <sstream>
#include "../../test_common/harness/testHarness.h"
#include "common.h"
// SVM Atomic wrappers.
// Platforms that support SVM atomics (atomics that work across the host and devices) need to implement these host side functions correctly.
// Platforms that do not support SVM atomics can simpy implement these functions as empty stubs since the functions will not be called.
// For now only Windows x86 is implemented, add support for other platforms as needed.
cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order)
{
#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
return *pValue; // provided the value is aligned x86 doesn't need anything more than this for seq_cst.
#elif defined(__GNUC__)
return __sync_add_and_fetch(pValue, 0);
#else
log_error("ERROR: AtomicLoadExplicit function not implemented\n");
return -1;
#endif
}
// all the x86 atomics are seq_cst, so don't need to do anything with the memory order parameter.
cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o)
{
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
return InterlockedExchangeAdd( (volatile LONG*) object, operand);
#elif defined(__GNUC__)
return __sync_fetch_and_add(object, operand);
#else
log_error("ERROR: AtomicFetchAddExplicit function not implemented\n");
return -1;
#endif
}
cl_int AtomicExchangeExplicit(volatile cl_int *object, cl_int desired, cl_memory_order mo)
{
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
return InterlockedExchange( (volatile LONG*) object, desired);
#elif defined(__GNUC__)
return __sync_lock_test_and_set(object, desired);
#else
log_error("ERROR: AtomicExchangeExplicit function not implemented\n");
return -1;
#endif
}
const char *linked_list_create_and_verify_kernels[] = {
"typedef struct Node {\n"
" int global_id;\n"
" int position_in_list;\n"
" __global struct Node* pNext;\n"
"} Node;\n"
"\n"
// The allocation_index parameter must be initialized on the host to N work-items
// The first N nodes in pNodes will be the heads of the lists.
"__kernel void create_linked_lists(__global Node* pNodes, volatile __attribute__((nosvm)) __global int* allocation_index, int list_length)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" __global Node *pNode = &pNodes[i];\n"
"\n"
" pNode->global_id = i;\n"
" pNode->position_in_list = 0;\n"
"\n"
" __global Node *pNew;\n"
" for(int j=1; j < list_length; j++)\n"
" {\n"
" pNew = &pNodes[ atomic_inc(allocation_index) ];// allocate a new node\n"
" pNew->global_id = i;\n"
" pNew->position_in_list = j;\n"
" pNode->pNext = pNew; // link new node onto end of list\n"
" pNode = pNew; // move to end of list\n"
" }\n"
"}\n"
"__kernel void verify_linked_lists(__global Node* pNodes, volatile __global uint* num_correct, int list_length)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" __global Node *pNode = &pNodes[i];\n"
"\n"
" for(int j=0; j < list_length; j++)\n"
" {\n"
" if( pNode->global_id == i && pNode->position_in_list == j)\n"
" {\n"
" atomic_inc(num_correct);\n"
" } \n"
" else {\n"
" break;\n"
" }\n"
" pNode = pNode->pNext;\n"
" }\n"
"}\n"
};
// The first N nodes in pNodes will be the heads of the lists.
void create_linked_lists(Node* pNodes, size_t num_lists, int list_length)
{
size_t allocation_index = num_lists; // heads of lists are in first num_lists nodes.
for(cl_uint i = 0; i < num_lists; i++)
{
Node *pNode = &pNodes[i];
pNode->global_id = i;
pNode->position_in_list = 0;
Node *pNew;
for(int j=1; j < list_length; j++)
{
pNew = &pNodes[ allocation_index++ ];// allocate a new node
pNew->global_id = i;
pNew->position_in_list = j;
pNode->pNext = pNew; // link new node onto end of list
pNode = pNew; // move to end of list
}
}
}
cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length)
{
cl_int error = CL_SUCCESS;
int numCorrect = 0;
log_info(" and verifying on host ");
for(cl_uint i=0; i < num_lists; i++)
{
Node *pNode = &pNodes[i];
for(int j=0; j < list_length; j++)
{
if( pNode->global_id == i && pNode->position_in_list == j)
{
numCorrect++;
}
else {
break;
}
pNode = pNode->pNext;
}
}
if(numCorrect != list_length * (cl_uint)num_lists)
{
error = -1;
log_info("Failed\n");
}
else
log_info("Passed\n");
return error;
}
// Note that we don't use the context provided by the test harness since it doesn't support multiple devices,
// so we create are own context here that has all devices, we use the same platform that the harness used.
cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps)
{
cl_int error;
cl_platform_id platform_id;
// find out what platform the harness is using.
error = clGetDeviceInfo(device_from_harness, CL_DEVICE_PLATFORM,sizeof(cl_platform_id),&platform_id,NULL);
test_error(error,"clGetDeviceInfo failed");
error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 0, NULL, num_devices );
test_error(error, "clGetDeviceIDs failed");
std::vector<cl_device_id> devicesTmp(*num_devices), devices, capable_devices;
error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, *num_devices, &devicesTmp[0], NULL );
test_error(error, "clGetDeviceIDs failed");
devices.push_back(device_from_harness);
for (size_t i = 0; i < devicesTmp.size(); ++i)
{
if (device_from_harness != devicesTmp[i])
devices.push_back(devicesTmp[i]);
}
// Select only the devices that support the SVM level needed for the test.
// Note that if requested SVM capabilities are not supported by any device then the test still passes (even though it does not execute).
cl_device_svm_capabilities caps;
cl_uint num_capable_devices = 0;
for(cl_uint i = 0; i < *num_devices; i++)
{
size_t ret_len = 0;
error = clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, 0, 0, &ret_len);
if (error != CL_SUCCESS)
{
log_error("clGetDeviceInfo failed %s\n", IGetErrorString(error));
return -1;
}
std::vector<char> oclVersion(ret_len + 1);
error = clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(char) * oclVersion.size(), &oclVersion[0], 0);
if (error != CL_SUCCESS)
{
log_error("clGetDeviceInfo failed %s\n", IGetErrorString(error));
return -1;
}
std::string versionStr(&oclVersion[7]);
std::stringstream stream;
stream << versionStr;
double version = 0.0;
stream >> version;
if(device_from_harness != devices[i] && version < 2.0)
{
continue;
}
error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
test_error(error,"clGetDeviceInfo failed for CL_DEVICE_MEM_SHARING");
if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS)))
{
log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value");
return -1;
}
if((caps & required_svm_caps) == required_svm_caps)
{
capable_devices.push_back(devices[i]);
++num_capable_devices;
}
}
devices = capable_devices; // the only devices we care about from here on are the ones capable of supporting the requested SVM level.
*num_devices = num_capable_devices;
if(num_capable_devices == 0)
// if(svm_level > CL_DEVICE_COARSE_SVM && 0 == num_capable_devices)
{
log_info("Requested SVM level not supported by any device on this platform, test not executed.\n");
return 1; // 1 indicates do not execute, but counts as passing.
}
cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, NULL };
*context = clCreateContext(context_properties, *num_devices, &devices[0], NULL, NULL, &error);
test_error(error, "Unable to create context" );
// *queues = (cl_command_queue *) malloc( *num_devices * sizeof( cl_command_queue ) );
for(cl_uint i = 0; i < *num_devices; i++)
{
queues[i] = clCreateCommandQueueWithProperties(*context, devices[i], 0, &error);
test_error(error, "clCreateCommandQueue failed");
}
if(ppCodeString)
{
*program = clCreateProgramWithSource(*context, 1, ppCodeString , NULL, &error);
test_error( error, "clCreateProgramWithSource failed" );
error = clBuildProgram(*program,0,NULL,"-cl-std=CL2.0",NULL,NULL);
if (error != CL_SUCCESS)
{
print_error(error, "clBuildProgram failed");
char *buildLog = NULL;
size_t buildLogSize = 0;
error = clGetProgramBuildInfo (*program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &buildLogSize);
buildLog = (char*)malloc(buildLogSize);
memset(buildLog, 0, buildLogSize);
error = clGetProgramBuildInfo (*program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL);
char string[15000];
sprintf(string,"%s", buildLog);
//MessageBox(NULL,(LPCWSTR)string,(LPCWSTR)"OpenCL Error",MB_OK);
//MessageBox(NULL,string,"OpenCL Error",MB_OK);
free(buildLog);
log_info("%s",string);
if (error) {
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
return -1;
}
return -1;
}
}
return 0;
}
basefn basefn_list[] = {
test_byte_granularity,
test_set_kernel_exec_info_svm_ptrs,
test_fine_grain_memory_consistency,
test_fine_grain_sync_buffers,
test_shared_address_space_fine_grain,
test_shared_sub_buffers,
test_shared_address_space_fine_grain_buffers,
test_allocate_shared_buffer,
test_shared_address_space_coarse_grain_old_api,
test_shared_address_space_coarse_grain_new_api,
test_cross_buffer_pointers_coarse_grain,
test_svm_pointer_passing,
test_enqueue_api,
};
const char *basefn_names[] = {
"svm_byte_granularity",
"svm_set_kernel_exec_info_svm_ptrs",
"svm_fine_grain_memory_consistency",
"svm_fine_grain_sync_buffers",
"svm_shared_address_space_fine_grain",
"svm_shared_sub_buffers",
"svm_shared_address_space_fine_grain_buffers",
"svm_allocate_shared_buffer",
"svm_shared_address_space_coarse_grain_old_api",
"svm_shared_address_space_coarse_grain_new_api",
"svm_cross_buffer_pointers_coarse_grain",
"svm_pointer_passing",
"svm_enqueue_api",
};
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
int num_fns = sizeof(basefn_names) / sizeof(char *);
int main(int argc, const char *argv[])
{
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, true, 0 );
}

View File

@@ -0,0 +1,107 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
const cl_mem_flags flag_set[] = {
CL_MEM_READ_WRITE,
CL_MEM_WRITE_ONLY,
CL_MEM_READ_ONLY,
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
0
};
const char* flag_set_names[] = {
"CL_MEM_READ_WRITE",
"CL_MEM_WRITE_ONLY",
"CL_MEM_READ_ONLY",
"CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER",
"CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER",
"CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER",
"CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
"CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
"CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
"0"
};
int test_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int err = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
cl_device_svm_capabilities caps;
err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
test_error(err,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
// under construction...
err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(err) return -1;
size_t size = 1024;
// iteration over flag combos
int num_flags = sizeof(flag_set)/sizeof(cl_mem_flags);
for(int i = 0; i < num_flags; i++)
{
if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0 && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0)
|| ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0 && (caps & CL_DEVICE_SVM_ATOMICS) == 0))
{
log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]);
continue;
}
log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]);
cl_char *pBufData1 = (cl_char*) clSVMAlloc(context, flag_set[i], size, 0);
if(pBufData1 == NULL)
{
log_error("SVMalloc returned NULL");
return -1;
}
{
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, size, pBufData1, &err);
test_error(err,"clCreateBuffer failed");
cl_char *pBufData2 = NULL;
cl_uint flags = CL_MAP_READ | CL_MAP_READ;
if(flag_set[i] & CL_MEM_HOST_READ_ONLY) flags ^= CL_MAP_WRITE;
if(flag_set[i] & CL_MEM_HOST_WRITE_ONLY) flags ^= CL_MAP_READ;
if(!(flag_set[i] & CL_MEM_HOST_NO_ACCESS))
{
pBufData2 = (cl_char*) clEnqueueMapBuffer(queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL,NULL, &err);
test_error(err, "clEnqueueMapBuffer failed");
if(pBufData2 != pBufData1 || NULL == pBufData1)
{
log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc");
return -1;
}
}
}
clSVMFree(context, pBufData1);
}
return 0;
}

View File

@@ -0,0 +1,148 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
const char *byte_manipulation_kernels[] = {
// Each device will write it's id into the bytes that it "owns", ownership is based on round robin (global_id % num_id)
// num_id is equal to number of SVM devices in the system plus one (for the host code).
// id is the index (id) of the device that this kernel is executing on.
// For example, if there are 2 SVM devices and the host; the buffer should look like this after each device and the host write their id's:
// 0, 1, 2, 0, 1, 2, 0, 1, 2...
"__kernel void write_owned_locations(__global char* a, uint num_id, uint id)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" int owner = i % num_id;\n"
" if(id == owner) \n"
" a[i] = id;\n" // modify location if it belongs to this device, write id
"}\n"
// Verify that a device can see the byte sized updates from the other devices, sum up the device id's and see if they match expected value.
// Note: this must be called with a reduced NDRange so that neighbor acesses don't go past end of buffer.
// For example if there are two SVM devices and the host (3 total devices) the buffer should look like this:
// 0,1,2,0,1,2...
// and the expected sum at each point is 0+1+2 = 3.
"__kernel void sum_neighbor_locations(__global char* a, uint num_devices, volatile __global uint* error_count)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" uint expected_sum = (num_devices * (num_devices - 1))/2;\n"
" uint sum = 0;\n"
" for(uint j=0; j<num_devices; j++) {\n"
" sum += a[i + j];\n" // add my neighbors to the right
" }\n"
" if(sum != expected_sum)\n"
" atomic_inc(error_count);\n"
"}\n"
};
int test_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
{
clContextWrapper context;
clProgramWrapper program;
clKernelWrapper k1,k2;
clCommandQueueWrapper queues[MAXQ];
cl_uint num_devices = 0;
cl_int err = CL_SUCCESS;
cl_int rval = CL_SUCCESS;
err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(err < 0) return -1; // fail test.
cl_uint num_devices_plus_host = num_devices + 1;
k1 = clCreateKernel(program, "write_owned_locations", &err);
test_error(err, "clCreateKernel failed");
k2 = clCreateKernel(program, "sum_neighbor_locations", &err);
test_error(err, "clCreateKernel failed");
cl_char *pA = (cl_char*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_char) * num_elements, 0);
cl_uint **error_counts = (cl_uint**) malloc(sizeof(void*) * num_devices);
for(cl_uint i=0; i < num_devices; i++) {
error_counts[i] = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint), 0);
*error_counts[i] = 0;
}
for(int i=0; i < num_elements; i++) pA[i] = -1;
err |= clSetKernelArgSVMPointer(k1, 0, pA);
err |= clSetKernelArg(k1, 1, sizeof(cl_uint), &num_devices_plus_host);
test_error(err, "clSetKernelArg failed");
// get all the devices going simultaneously
size_t element_num = num_elements;
for(cl_uint d=0; d < num_devices; d++) // device ids starting at 1.
{
err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d);
test_error(err, "clSetKernelArg failed");
err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL, 0, NULL, NULL);
test_error(err,"clEnqueueNDRangeKernel failed");
}
for(cl_uint d=0; d < num_devices; d++) clFlush(queues[d]);
cl_uint host_id = num_devices; // host code will take the id above the devices.
for(int i = (int)num_devices; i < num_elements; i+= num_devices_plus_host) pA[i] = host_id;
for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
// now check that each device can see the byte writes made by the other devices.
err |= clSetKernelArgSVMPointer(k2, 0, pA);
err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host);
test_error(err, "clSetKernelArg failed");
// adjusted so k2 doesn't read past end of buffer
size_t adjusted_num_elements = num_elements - num_devices;
for(cl_uint id = 0; id < num_devices; id++)
{
err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]);
test_error(err, "clSetKernelArg failed");
err = clEnqueueNDRangeKernel(queues[id], k2, 1, NULL, &adjusted_num_elements, NULL, 0, NULL, NULL);
test_error(err,"clEnqueueNDRangeKernel failed");
}
for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
bool failed = false;
// see if any of the devices found errors
for(cl_uint i=0; i < num_devices; i++) {
if(*error_counts[i] > 0)
failed = true;
}
cl_uint expected = (num_devices_plus_host * (num_devices_plus_host - 1))/2;
// check that host can see the byte writes made by the devices.
for(cl_uint i = 0; i < num_elements - num_devices_plus_host; i++)
{
int sum = 0;
for(cl_uint j=0; j < num_devices_plus_host; j++) sum += pA[i+j];
if(sum != expected)
failed = true;
}
clSVMFree(context, pA);
for(cl_uint i=0; i < num_devices; i++) clSVMFree(context, error_counts[i]);
if(failed)
return -1;
return 0;
}

View File

@@ -0,0 +1,219 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
// create linked lists that use nodes from two different buffers.
const char *SVMCrossBufferPointers_test_kernel[] = {
"\n"
"typedef struct Node {\n"
" int global_id;\n"
" int position_in_list;\n"
" __global struct Node* pNext;\n"
"} Node;\n"
"\n"
"__global Node* allocate_node(__global Node* pNodes1, __global Node* pNodes2, volatile __global int* allocation_index, size_t i)\n"
"{\n"
// mix things up, adjacent work items will allocate from different buffers
" if(i & 0x1)\n"
" return &pNodes1[atomic_inc(allocation_index)];\n"
" else\n"
" return &pNodes2[atomic_inc(allocation_index)];\n"
"}\n"
"\n"
// The allocation_index parameter must be initialized on the host to N work-items
// The first N nodes in pNodes will be the heads of the lists.
"__kernel void create_linked_lists(__global Node* pNodes, __global Node* pNodes2, volatile __global int* allocation_index, int list_length)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" __global Node *pNode = &pNodes[i];\n"
"\n"
" pNode->global_id = i;\n"
" pNode->position_in_list = 0;\n"
"\n"
" __global Node *pNew;\n"
" for(int j=1; j < list_length; j++)\n"
" {\n"
" pNew = allocate_node(pNodes, pNodes2, allocation_index, i);\n"
" pNew->global_id = i;\n"
" pNew->position_in_list = j;\n"
" pNode->pNext = pNew; // link new node onto end of list\n"
" pNode = pNew; // move to end of list\n"
" }\n"
"}\n"
"\n"
"__kernel void verify_linked_lists(__global Node* pNodes, __global Node* pNodes2, volatile __global uint* num_correct, int list_length)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" __global Node *pNode = &pNodes[i];\n"
"\n"
" for(int j=0; j < list_length; j++)\n"
" {\n"
" if( pNode->global_id == i && pNode->position_in_list == j)\n"
" {\n"
" atomic_inc(num_correct);\n"
" }\n"
" else {\n"
" break;\n"
" }\n"
" pNode = pNode->pNext;\n"
" }\n"
"}\n"
};
// Creates linked list using host code.
cl_int create_linked_lists_on_host(cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
{
cl_int error = CL_SUCCESS;
log_info("SVM: creating linked list on host ");
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes2, "clEnqueueMapBuffer failed");
create_linked_lists(pNodes, numLists, ListLength);
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clFinish(cmdq);
test_error(error, "clFinish failed");
return error;
}
// Verify correctness of the linked list using host code.
cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
{
cl_int error = CL_SUCCESS;
//log_info(" and verifying on host ");
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
error = verify_linked_lists(pNodes, numLists, ListLength);
if(error) return -1;
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clFinish(cmdq);
test_error(error, "clFinish failed");
return error;
}
// This tests that shared buffers are able to contain pointers that point to other shared buffers.
// This tests that all devices and the host share a common address space; using only the coarse-grain features.
// This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host.
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
// This basic test is performed for all combinations of devices and the host.
int test_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error) return -1;
size_t numLists = num_elements;
cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
// this buffer holds some of the linked list nodes.
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
// this buffer holds some of the linked list nodes.
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
{
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
test_error(error, "clCreateBuffer failed.");
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the index into the nodes buffer that is used for node allocation
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the count of correct nodes which is computed by the verify kernel.
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
//error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) pNodes);
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &allocator);
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int), (void *) &ListLength);
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes);
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &num_correct);
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int), (void *) &ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host).
// Do this for all possible combinations of devices and host within the platform.
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
{
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
{
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
{
error = create_linked_lists_on_host(queues[0], nodes, nodes2, ListLength, numLists);
if(error) return -1;
}
else
{
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
if(error) return -1;
}
if(vi == num_devices)
{
error = verify_linked_lists_on_host(vi, queues[0], nodes, nodes2, ListLength, numLists);
if(error) return -1;
}
else
{
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
if(error) return -1;
}
} // inner loop, vi
} // outer loop, ci
}
clSVMFree(context, pNodes2);
clSVMFree(context, pNodes);
return 0;
}

View File

@@ -0,0 +1,254 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
#include "../../test_common/harness/mt19937.h"
#include <vector>
typedef struct
{
cl_uint status;
cl_uint num_svm_pointers;
std::vector<void *> svm_pointers;
} CallbackData;
void generate_data(std::vector<cl_uchar> &data, size_t size, MTdata seed)
{
cl_uint randomData = genrand_int32(seed);
cl_uint bitsLeft = 32;
for( size_t i = 0; i < size; i++ )
{
if( 0 == bitsLeft)
{
randomData = genrand_int32(seed);
bitsLeft = 32;
}
data[i] = (cl_uchar)( randomData & 255 );
randomData >>= 8; randomData -= 8;
}
}
//callback which will be passed to clEnqueueSVMFree command
void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_pointers, void * svm_pointers[], void * user_data)
{
CallbackData *data = (CallbackData *)user_data;
data->num_svm_pointers = num_svm_pointers;
data->svm_pointers.resize(num_svm_pointers, 0);
cl_context context;
if(clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, 0) != CL_SUCCESS)
{
log_error("clGetCommandQueueInfo failed in the callback\n");
return;
}
for (size_t i = 0; i < num_svm_pointers; ++i)
{
data->svm_pointers[i] = svm_pointers[i];
clSVMFree(context, svm_pointers[i]);
}
data->status = 1;
}
int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clCommandQueueWrapper queues[MAXQ];
cl_uint num_devices = 0;
const size_t elementNum = 1024;
const size_t numSVMBuffers = 32;
cl_int error = CL_SUCCESS;
RandomSeed seed(0);
error = create_cl_objects(deviceID, NULL, &context, NULL, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error) return -1;
queue = queues[0];
//all possible sizes of vectors and scalars
size_t typeSizes[] = {
sizeof(cl_uchar),
sizeof(cl_uchar2),
sizeof(cl_uchar3),
sizeof(cl_uchar4),
sizeof(cl_uchar8),
sizeof(cl_uchar16),
sizeof(cl_ushort),
sizeof(cl_ushort2),
sizeof(cl_ushort3),
sizeof(cl_ushort4),
sizeof(cl_ushort8),
sizeof(cl_ushort16),
sizeof(cl_uint),
sizeof(cl_uint2),
sizeof(cl_uint3),
sizeof(cl_uint4),
sizeof(cl_uint8),
sizeof(cl_uint16),
sizeof(cl_ulong),
sizeof(cl_ulong2),
sizeof(cl_ulong3),
sizeof(cl_ulong4),
sizeof(cl_ulong8),
sizeof(cl_ulong16),
};
for (size_t i = 0; i < ( sizeof(typeSizes) / sizeof(typeSizes[0]) ); ++i)
{
//generate initial data
std::vector<cl_uchar> fillData0(typeSizes[i]), fillData1(typeSizes[i], 0), fillData2(typeSizes[i]);
generate_data(fillData0, typeSizes[i], seed);
generate_data(fillData2, typeSizes[i], seed);
cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum * typeSizes[i], 0);
cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum * typeSizes[i], 0);
clEventWrapper userEvent = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
clEventWrapper eventMemFill;
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0], typeSizes[i], elementNum * typeSizes[i], 1, &userEvent, &eventMemFill);
test_error(error, "clEnqueueSVMMemFill failed");
clEventWrapper eventMemcpy;
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, elementNum * typeSizes[i], 1, &eventMemFill, &eventMemcpy);
test_error(error, "clEnqueueSVMMemcpy failed");
error = clSetUserEventStatus(userEvent, CL_COMPLETE);
test_error(error, "clSetUserEventStatus failed");
clEventWrapper eventMap;
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, dstBuffer, elementNum * typeSizes[i], 1, &eventMemcpy, &eventMap);
test_error(error, "clEnqueueSVMMap failed");
error = clWaitForEvents(1, &eventMap);
test_error(error, "clWaitForEvents failed");
//data verification
for (size_t j = 0; j < elementNum * typeSizes[i]; ++j)
{
if (dstBuffer[j] != fillData0[j % typeSizes[i]])
{
log_error("Invalid data at index %ld, expected %d, got %d\n", j, fillData0[j % typeSizes[i]], dstBuffer[j]);
return -1;
}
}
clEventWrapper eventUnmap;
error = clEnqueueSVMUnmap(queue, dstBuffer, 0, 0, &eventUnmap);
test_error(error, "clEnqueueSVMUnmap failed");
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData2[0], typeSizes[i], elementNum * typeSizes[i] / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemFill(queue, dstBuffer + elementNum * typeSizes[i] / 2, &fillData2[0], typeSizes[i], elementNum * typeSizes[i] / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemFill failed");
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, elementNum * typeSizes[i] / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemcpy failed");
error = clEnqueueSVMMemcpy(queue, CL_TRUE, dstBuffer + elementNum * typeSizes[i] / 2, srcBuffer + elementNum * typeSizes[i] / 2, elementNum * typeSizes[i] / 2, 0, 0, 0);
test_error(error, "clEnqueueSVMMemcpy failed");
void *ptrs[] = {(void *)srcBuffer, (void *)dstBuffer};
clEventWrapper eventFree;
error = clEnqueueSVMFree(queue, 2, ptrs, 0, 0, 0, 0, &eventFree);
test_error(error, "clEnqueueSVMFree failed");
error = clWaitForEvents(1, &eventFree);
test_error(error, "clWaitForEvents failed");
//event info verification for new SVM commands
cl_command_type commandType;
error = clGetEventInfo(eventMemFill, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MEMFILL)
{
log_error("Invalid command type returned for clEnqueueSVMMemFill\n");
return -1;
}
error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MEMCPY)
{
log_error("Invalid command type returned for clEnqueueSVMMemcpy\n");
return -1;
}
error = clGetEventInfo(eventMap, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_MAP)
{
log_error("Invalid command type returned for clEnqueueSVMMap\n");
return -1;
}
error = clGetEventInfo(eventUnmap, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_UNMAP)
{
log_error("Invalid command type returned for clEnqueueSVMUnmap\n");
return -1;
}
error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
test_error(error, "clGetEventInfo failed");
if (commandType != CL_COMMAND_SVM_FREE)
{
log_error("Invalid command type returned for clEnqueueSVMFree\n");
return -1;
}
}
std::vector<void *> buffers(numSVMBuffers, 0);
for(size_t i = 0; i < numSVMBuffers; ++i) buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0);
//verify if callback is triggered correctly
CallbackData data;
data.status = 0;
error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0], callback_svm_free, &data, 0, 0, 0);
test_error(error, "clEnqueueSVMFree failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
//wait for the callback
while(data.status == 0) { }
//check if number of SVM pointers returned in the callback matches with expected
if (data.num_svm_pointers != buffers.size())
{
log_error("Invalid number of SVM pointers returned in the callback, expected: %ld, got: %d\n", buffers.size(), data.num_svm_pointers);
return -1;
}
//check if pointers returned in callback are correct
for (size_t i = 0; i < buffers.size(); ++i)
{
if (data.svm_pointers[i] != buffers[i])
{
log_error("Invalid SVM pointer returned in the callback, idx: %ld\n", i);
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,176 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
static char hash_table_kernel[] =
"#if 0\n"
"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
"#endif\n"
"typedef struct BinNode {\n"
" int value;\n"
" atomic_uintptr_t pNext;\n"
"} BinNode;\n"
"__kernel void build_hash_table(__global uint* input, __global BinNode* pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
"{\n"
" __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, 1, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
" uint i = get_global_id(0);\n"
" uint b = input[i] % numBins;\n"
" pNew->value = input[i];\n"
" uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), memory_order_seq_cst, memory_scope_all_svm_devices);\n"
" do\n"
" {\n"
" atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, memory_scope_all_svm_devices);\n" // always inserting at head of list
" } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), &next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, memory_scope_all_svm_devices));\n"
"}\n";
typedef struct BinNode{
cl_uint value;
struct BinNode* pNext;
} BinNode;
void build_hash_table_on_host(cl_context c, cl_uint* input, size_t inputSize, BinNode* pNodes, cl_int volatile *pNumNodes, cl_uint numBins)
{
for(cl_uint i = 0; i < inputSize; i++)
{
BinNode *pNew = &pNodes[ AtomicFetchAddExplicit(pNumNodes, 1, memory_order_relaxed) ];
cl_uint b = input[i] % numBins;
pNew->value = input[i];
BinNode *next = pNodes[b].pNext;
do {
pNew->pNext = next; // always inserting at head of list
} while(!AtomicCompareExchangeStrongExplicit(&(pNodes[b].pNext), &next, pNew, memory_order_relaxed, memory_order_seq_cst));
}
}
int launch_kernels_and_verify(clContextWrapper &context, clCommandQueueWrapper* queues, clKernelWrapper &kernel, cl_uint num_devices, cl_uint numBins, size_t num_pixels)
{
int err = CL_SUCCESS;
cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0);
BinNode *pNodes = (BinNode*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(BinNode) * (num_pixels * (num_devices + 1) + numBins), 0);
cl_int *pNumNodes = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int), 0);
*pNumNodes = numBins; // using the first numBins nodes to hold the list heads.
for(cl_uint i=0;i<numBins;i++) {
pNodes[i].pNext = NULL;
};
for(cl_uint i=0; i < num_pixels; i++) pInputImage[i] = i;
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
err |= clSetKernelArgSVMPointer(kernel, 1, pNodes);
err |= clSetKernelArgSVMPointer(kernel, 2, pNumNodes);
err |= clSetKernelArg(kernel, 3, sizeof(cl_uint), (void*) &numBins);
test_error(err, "clSetKernelArg failed");
cl_event done;
// get all the devices going simultaneously, each device (and the host) will insert all the pixels.
for(cl_uint d=0; d<num_devices; d++)
{
err = clEnqueueNDRangeKernel(queues[d], kernel, 1, NULL, &num_pixels, 0, 0, NULL, &done);
test_error(err,"clEnqueueNDRangeKernel failed");
}
for(cl_uint d=0; d<num_devices; d++) clFlush(queues[d]);
// wait until we see some activity from a device (try to run host side simultaneously).
while(numBins == AtomicLoadExplicit(pNumNodes, memory_order_relaxed));
build_hash_table_on_host(context, pInputImage, num_pixels, pNodes, pNumNodes, numBins);
for(cl_uint d=0; d<num_devices; d++) clFinish(queues[d]);
cl_uint num_items = 0;
// check correctness of each bin in the hash table.
for(cl_uint i = 0; i < numBins; i++)
{
BinNode *pNode = pNodes[i].pNext;
while(pNode)
{
if((pNode->value % numBins) != i)
{
log_error("Something went wrong, item is in wrong hash bucket\n");
break;
}
num_items++;
pNode = pNode->pNext;
}
}
clSVMFree(context, pInputImage);
clSVMFree(context, pNodes);
clSVMFree(context, pNumNodes);
// each device and the host inserted all of the pixels, check that none are missing.
if(num_items != num_pixels * (num_devices + 1) )
{
log_error("The hash table is not correct, num items %d, expected num items: %d\n", num_items, num_pixels * (num_devices + 1));
return -1; // test did not pass
}
return 0;
}
// This tests for memory consistency across devices and the host.
// Each device and the host simultaneously insert values into a single hash table.
// Each bin in the hash table is a linked list. Each bin is protected against simultaneous
// update using a lock free technique. The correctness of the list is verfied on the host.
// This test requires the new OpenCL 2.0 atomic operations that implement the new seq_cst memory ordering.
int test_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
{
clContextWrapper context;
clProgramWrapper program;
clKernelWrapper kernel;
clCommandQueueWrapper queues[MAXQ];
cl_uint num_devices = 0;
cl_int err = CL_SUCCESS;
if (sizeof(void *) == 8 && (!is_extension_available(deviceID, "cl_khr_int64_base_atomics") || !is_extension_available(deviceID, "cl_khr_int64_extended_atomics")))
{
log_info("WARNING: test skipped. 'cl_khr_int64_base_atomics' and 'cl_khr_int64_extended_atomics' extensions are not supported\n");
return 0;
}
// Make pragmas visible for 64-bit addresses
hash_table_kernel[4] = sizeof(void *) == 8 ? '1' : '0';
char *source[] = { hash_table_kernel };
err = create_cl_objects(deviceID, (const char**)source, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS);
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(err < 0) return -1; // fail test.
kernel = clCreateKernel(program, "build_hash_table", &err);
test_error(err, "clCreateKernel failed");
size_t num_pixels = num_elements;
int result;
cl_uint numBins = 1; // all work groups in all devices and the host code will hammer on this one lock.
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
if(result == -1) return result;
numBins = 2; // 2 locks within in same cache line will get hit from different devices and host.
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
if(result == -1) return result;
numBins = 29; // locks span a few cache lines.
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
if(result == -1) return result;
return result;
}

View File

@@ -0,0 +1,105 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
const char *find_targets_kernel[] = {
"__kernel void find_targets(__global uint* image, uint target, volatile __global atomic_uint *numTargetsFound, volatile __global atomic_uint *targetLocations)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" uint index;\n"
" if(image[i] == target) {\n"
" index = atomic_fetch_add_explicit(numTargetsFound, 1, memory_order_relaxed, memory_scope_device); \n"
" atomic_exchange_explicit(&targetLocations[index], i, memory_order_relaxed, memory_scope_all_svm_devices); \n"
" }\n"
"}\n"
};
void spawnAnalysisTask(int location)
{
// printf("found target at location %d\n", location);
}
#define MAX_TARGETS 1024
// Goals: demonstrate use of SVM's atomics to do fine grain synchronization between the device and host.
// Concept: a device kernel is used to search an input image for regions that match a target pattern.
// The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices).
// The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets.
int test_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int err = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
err = create_cl_objects(deviceID, &find_targets_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS);
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(err < 0) return -1; // fail test.
clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err);
test_error(err, "clCreateKernel failed");
size_t num_pixels = num_elements;
//cl_uint num_pixels = 1024*1024*32;
cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0);
cl_uint *pNumTargetsFound = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_uint), 0);
cl_int *pTargetLocations = (cl_int* ) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int) * MAX_TARGETS, 0);
cl_uint targetDescriptor = 777;
*pNumTargetsFound = 0;
cl_uint i;
for(i=0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1;
for(i=0; i < num_pixels; i++) pInputImage[i] = 0;
pInputImage[0] = targetDescriptor;
pInputImage[3] = targetDescriptor;
pInputImage[num_pixels - 1] = targetDescriptor;
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), (void*) &targetDescriptor);
err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound);
err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations);
test_error(err, "clSetKernelArg failed");
cl_event done;
err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, 0, NULL, &done);
test_error(err,"clEnqueueNDRangeKernel failed");
clFlush(queues[0]);
i=0;
cl_int status;
// check for new targets, if found spawn a task to analyze target.
do {
err = clGetEventInfo(done,CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL);
test_error(err,"clGetEventInfo failed");
if( AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1) // -1 indicates slot not used yet.
{
spawnAnalysisTask(pTargetLocations[i]);
i++;
}
} while (status != CL_COMPLETE || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1);
clSVMFree(context, pInputImage);
clSVMFree(context, pNumTargetsFound);
clSVMFree(context, pTargetLocations);
if(i != 3) return -1;
return 0;
}

View File

@@ -0,0 +1,115 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
const char *SVMPointerPassing_test_kernel[] = {
"__kernel void verify_char(__global uchar* pChar, volatile __global uint* num_correct, uchar expected)\n"
"{\n"
" if(0 == get_global_id(0))\n"
" {\n"
" *num_correct = 0;\n"
" if(*pChar == expected)\n"
" {\n"
" *num_correct=1;\n"
" }\n"
" }\n"
"}\n"
};
// Test that arbitrarily aligned char pointers into shared buffers can be passed directly to a kernel.
// This iterates through a buffer passing a pointer to each location to the kernel.
// The buffer is initialized to known values at each location.
// The kernel checks that it finds the expected value at each location.
// TODO: possibly make this work across all base types (including typeN?), also check ptr arithmetic ++,--.
int test_svm_pointer_passing(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error) return -1;
clKernelWrapper kernel_verify_char = clCreateKernel(program, "verify_char", &error);
test_error(error,"clCreateKernel failed");
size_t bufSize = 256;
char *pbuf = (char*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_uchar)*bufSize, 0);
cl_int *pNumCorrect = NULL;
pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0);
{
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar)*bufSize, pbuf, &error);
test_error(error, "clCreateBuffer failed.");
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error);
test_error(error, "clCreateBuffer failed.");
error = clSetKernelArg(kernel_verify_char, 1, sizeof(void*), (void *) &num_correct);
test_error(error, "clSetKernelArg failed");
// put values into buf so that we can expect to see these values in the kernel when we pass a pointer to them.
cl_command_queue cmdq = queues[0];
cl_uchar* pBuf = (cl_uchar*) clEnqueueMapBuffer(cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uchar)*bufSize, 0, NULL,NULL, &error);
test_error2(error, pBuf, "clEnqueueMapBuffer failed");
for(int i = 0; i<(int)bufSize; i++)
{
pBuf[i]= (cl_uchar)i;
}
error = clEnqueueUnmapMemObject(cmdq, buf, pBuf, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed.");
for (cl_uint ii = 0; ii<num_devices; ++ii) // iterate over all devices in the platform.
{
cmdq = queues[ii];
for(int i = 0; i<(int)bufSize; i++)
{
cl_uchar* pChar = &pBuf[i];
error = clSetKernelArgSVMPointer(kernel_verify_char, 0, pChar); // pass a pointer to a location within the buffer
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_verify_char, 2, sizeof(cl_uchar), (void *) &i ); // pass the expected value at the above location.
test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_char, 1, NULL, &bufSize, NULL, 0, NULL, NULL);
test_error(error,"clEnqueueNDRangeKernel failed");
pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
cl_int correct_count = *pNumCorrect;
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed.");
if(correct_count != 1)
{
log_error("Passing pointer directly to kernel for byte #%d failed on device %d\n", i, ii);
return -1;
}
}
}
error = clFinish(cmdq);
test_error(error, "clFinish failed");
}
clSVMFree(context, pbuf);
clSVMFree(context, pNumCorrect);
return 0;
}

View File

@@ -0,0 +1,153 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
typedef struct {
cl_int *pA;
cl_int *pB;
cl_int *pC;
} BufPtrs;
const char *set_kernel_exec_info_svm_ptrs_kernel[] = {
"struct BufPtrs;\n"
"\n"
"typedef struct {\n"
" __global int *pA;\n"
" __global int *pB;\n"
" __global int *pC;\n"
"} BufPtrs;\n"
"\n"
"__kernel void set_kernel_exec_info_test(__global BufPtrs* pBufs)\n"
"{\n"
" size_t i;\n"
" i = get_global_id(0);\n"
" pBufs->pA[i]++;\n"
" pBufs->pB[i]++;\n"
" pBufs->pC[i]++;\n"
"}\n"
};
// Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
//
int test_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper c = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
//error = create_cl_objects(deviceID, &set_kernel_exec_info_svm_ptrs_kernel[0], &context, &program, &q, &num_devices, CL_DEVICE_SVM_FINE_GRAIN);
error = create_cl_objects(deviceID, &set_kernel_exec_info_svm_ptrs_kernel[0], &c, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(error < 0) return -1; // fail test.
clKernelWrapper k = clCreateKernel(program, "set_kernel_exec_info_test", &error);
test_error(error, "clCreateKernel failed");
size_t size = num_elements*sizeof(int);
//int* pA = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
//int* pB = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
//int* pC = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
int* pA = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
int* pB = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
int* pC = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
BufPtrs* pBuf = (BufPtrs*) clSVMAlloc(c, CL_MEM_READ_WRITE, sizeof(BufPtrs), 0);
bool failed = false;
{
clMemWrapper ba,bb,bc,bBuf;
ba = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pA, &error);
test_error(error, "clCreateBuffer failed");
bb = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pB, &error);
test_error(error, "clCreateBuffer failed");
bc = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pC, &error);
test_error(error, "clCreateBuffer failed");
bBuf = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, sizeof(BufPtrs), pBuf, &error);
test_error(error, "clCreateBuffer failed");
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bBuf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(BufPtrs), 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
for(int i = 0; i < num_elements; i++) pA[i] = pB[i] = pC[i] = 0;
pBuf->pA = pA;
pBuf->pB = pB;
pBuf->pC = pC;
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bBuf, pBuf, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clSetKernelArgSVMPointer(k, 0, pBuf);
test_error(error, "clSetKernelArg failed");
error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, sizeof(BufPtrs), pBuf);
test_error(error, "clSetKernelExecInfo failed");
size_t range = num_elements;
error = clEnqueueNDRangeKernel(queues[0], k, 1, NULL, &range, NULL, 0, NULL, NULL);
test_error(error,"clEnqueueNDRangeKernel failed");
error = clFinish(queues[0]);
test_error(error, "clFinish failed.");
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
test_error(error, "clEnqueueMapBuffer failed");
for(int i = 0; i < num_elements; i++)
{
if(pA[i] + pB[i] + pC[i] != 3)
failed = true;
}
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
}
error = clFinish(queues[0]);
test_error(error, " clFinish failed.");
clSVMFree(c, pA);
clSVMFree(c, pB);
clSVMFree(c, pC);
clSVMFree(c, pBuf);
if(failed) return -1;
return 0;
}

View File

@@ -0,0 +1,282 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
// Creates linked list using host code
cl_int create_linked_lists_on_host(cl_command_queue cmdq, cl_mem nodes, Node *pNodes2, cl_int ListLength, size_t numLists, cl_bool useNewAPI )
{
cl_int error = CL_SUCCESS;
log_info("SVM: creating linked list on host ");
Node *pNodes;
if (useNewAPI == CL_FALSE)
{
pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqMapBuffer failed");
}
else
{
pNodes = pNodes2;
error = clEnqueueSVMMap(cmdq, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, pNodes2, sizeof(Node)*ListLength*numLists, 0, NULL,NULL);
test_error2(error, pNodes, "clEnqueueSVMMap failed");
}
create_linked_lists(pNodes, numLists, ListLength);
if (useNewAPI == CL_FALSE)
{
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed.");
}
else
{
error = clEnqueueSVMUnmap(cmdq, pNodes2, 0, NULL, NULL);
test_error(error, "clEnqueueSVMUnmap failed.");
}
error = clFinish(cmdq);
test_error(error, "clFinish failed.");
return error;
}
// Purpose: uses host code to verify correctness of the linked list
cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, Node *pNodes2, cl_int ListLength, size_t numLists, cl_bool useNewAPI )
{
cl_int error = CL_SUCCESS;
cl_int correct_count;
Node *pNodes;
if (useNewAPI == CL_FALSE)
{
pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
}
else
{
pNodes = pNodes2;
error = clEnqueueSVMMap(cmdq, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, pNodes2, sizeof(Node)*ListLength * numLists, 0, NULL,NULL);
test_error2(error, pNodes, "clEnqueueSVMMap failed");
}
correct_count = 0;
error = verify_linked_lists(pNodes, numLists, ListLength);
if(error) return -1;
if (useNewAPI == CL_FALSE)
{
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed.");
}
else
{
error = clEnqueueSVMUnmap(cmdq, pNodes2, 0,NULL,NULL);
test_error(error, "clEnqueueSVMUnmap failed.");
}
error = clFinish(cmdq);
test_error(error, "clFinish failed.");
return error;
}
cl_int create_linked_lists_on_device(int ci, cl_command_queue cmdq, cl_mem allocator, cl_kernel kernel_create_lists, size_t numLists )
{
cl_int error = CL_SUCCESS;
log_info("SVM: creating linked list on device: %d ", ci);
size_t *pAllocator = (size_t*) clEnqueueMapBuffer(cmdq, allocator, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
test_error2(error, pAllocator, "clEnqueueMapBuffer failed");
// reset allocator index
*pAllocator = numLists; // the first numLists elements of the nodes array are already allocated (they hold the head of each list).
error = clEnqueueUnmapMemObject(cmdq, allocator, pAllocator, 0,NULL,NULL);
test_error(error, " clEnqueueUnmapMemObject failed.");
error = clEnqueueNDRangeKernel(cmdq, kernel_create_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRange failed.");
error = clFinish(cmdq);
test_error(error, "clFinish failed.");
return error;
}
cl_int verify_linked_lists_on_device(int vi, cl_command_queue cmdq,cl_mem num_correct, cl_kernel kernel_verify_lists, cl_int ListLength, size_t numLists )
{
cl_int error = CL_SUCCESS;
log_info(" and verifying on device: %d ", vi);
cl_int *pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
*pNumCorrect = 0; // reset numCorrect to zero
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed.");
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
test_error(error,"clEnqueueNDRangeKernel failed");
pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
cl_int correct_count = *pNumCorrect;
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
clFinish(cmdq);
test_error(error,"clFinish failed");
if(correct_count != ListLength * (cl_uint)numLists)
{
error = -1;
log_info("Failed\n");
}
else
log_info("Passed\n");
return error;
}
// This tests that all devices and the host share a common address space; using only the coarse-grain features.
// This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
// the platform. The test passes only if every combination passes.
int shared_address_space_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements, cl_bool useNewAPI)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error) return -1;
size_t numLists = num_elements;
cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
// this buffer holds the linked list nodes.
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
{
cl_bool usesSVMpointer = CL_FALSE;
clMemWrapper nodes;
if (useNewAPI == CL_FALSE)
{
nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
test_error(error, "clCreateBuffer failed.");
// verify if buffer uses SVM pointer
size_t paramSize = 0;
error = clGetMemObjectInfo(nodes, CL_MEM_USES_SVM_POINTER, 0, 0, &paramSize);
test_error(error, "clGetMemObjectInfo failed.");
if (paramSize != sizeof(cl_bool))
{
log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned wrong size.");
return -1;
}
error = clGetMemObjectInfo(nodes, CL_MEM_USES_SVM_POINTER, sizeof(cl_bool), &usesSVMpointer, 0);
test_error(error, "clGetMemObjectInfo failed.");
if (usesSVMpointer != CL_TRUE)
{
log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned CL_FALSE for buffer created from SVM pointer.");
return -1;
}
}
// this buffer holds an index into the nodes buffer, it is used for node allocation
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
error = clGetMemObjectInfo(allocator, CL_MEM_USES_SVM_POINTER, sizeof(cl_bool), &usesSVMpointer, 0);
test_error(error, "clGetMemObjectInfo failed.");
if (usesSVMpointer != CL_FALSE)
{
log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned CL_TRUE for non-SVM buffer.");
return -1;
}
// this buffer holds the count of correct nodes, which is computed by the verify kernel.
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
if (useNewAPI == CL_TRUE)
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
else
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &allocator);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int), (void *) &ListLength);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &num_correct);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host).
// Do this for all possible combinations of devices and host within the platform.
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
{
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
{
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
{
error = create_linked_lists_on_host(queues[0], nodes, pNodes, ListLength, numLists, useNewAPI);
if(error) return -1;
}
else
{
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
if(error) return -1;
}
if(vi == num_devices)
{
error = verify_linked_lists_on_host(vi, queues[0], nodes, pNodes, ListLength, numLists, useNewAPI);
if(error) return -1;
}
else
{
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
if(error) return -1;
}
}
}
}
clSVMFree(context, pNodes);
return 0;
}
int test_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_FALSE);
}
int test_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_TRUE);
}

View File

@@ -0,0 +1,101 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
// This tests that all devices and the host share a common address space using fine-grain mode with no buffers.
// This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
// the platform. The test passes only if every combination passes.
int test_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM);
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(error < 0) return -1; // fail test.
size_t numLists = num_elements;
cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
// this allocation holds the linked list nodes.
// FIXME: remove the alignment once prototype can handle it
Node* pNodes = (Node*) align_malloc(numLists*ListLength*sizeof(Node),128);
test_error2(error, pNodes, "malloc failed");
// this allocation holds an index into the nodes buffer, it is used for node allocation
size_t* pAllocator = (size_t*) align_malloc(sizeof(cl_int), 128);
test_error2(error, pAllocator, "malloc failed");
// this allocation holds the count of correct nodes, which is computed by the verify kernel.
cl_int* pNum_correct = (cl_int*) align_malloc(sizeof(cl_int), 128);
test_error2(error, pNum_correct, "malloc failed");
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),(void *) &ListLength);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNum_correct);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host).
// Do this for all possible combinations of devices and host within the platform.
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
{
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
{
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
{
log_info("creating linked list on host ");
create_linked_lists(pNodes, numLists, ListLength);
}
else
{
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
if(error) return -1;
}
if(vi == num_devices)
{
error = verify_linked_lists(pNodes, numLists, ListLength);
if(error) return -1;
}
else
{
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNum_correct, kernel_verify_lists, ListLength, numLists);
if(error) return -1;
}
}
}
align_free(pNodes);
align_free(pAllocator);
align_free(pNum_correct);
return 0;
}

View File

@@ -0,0 +1,138 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
cl_int create_linked_lists_on_device_no_map(int ci, cl_command_queue cmdq, size_t* pAllocator, cl_kernel kernel_create_lists, size_t numLists )
{
cl_int error = CL_SUCCESS;
log_info("SVM: creating linked list on device: %d ", ci);
// reset allocator index
*pAllocator = numLists; // the first numLists elements of the nodes array are already allocated (they hold the head of each list).
error = clEnqueueNDRangeKernel(cmdq, kernel_create_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRange failed.");
error = clFinish(cmdq);
test_error(error, "clFinish failed.");
return error;
}
cl_int verify_linked_lists_on_device_no_map(int vi, cl_command_queue cmdq,cl_int* pNumCorrect, cl_kernel kernel_verify_lists, cl_int ListLength, size_t numLists )
{
cl_int error = CL_SUCCESS;
log_info(" and verifying on device: %d ", vi);
*pNumCorrect = 0; // reset numCorrect to zero
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
test_error(error,"clEnqueueNDRangeKernel failed");
clFinish(cmdq);
test_error(error,"clFinish failed");
cl_int correct_count = *pNumCorrect;
if(correct_count != ListLength * (cl_uint)numLists)
{
error = -1;
log_info("Failed\n");
}
else
log_info("Passed\n");
return error;
}
// This tests that all devices and the host share a common address space; using only the fine-grain with buffers mode.
// This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
// the platform. The test passes only if every combination passes.
int test_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(error < 0) return -1; // fail test.
size_t numLists = num_elements;
cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
// this buffer holds the linked list nodes.
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(Node)*ListLength*numLists, 0);
// this buffer holds an index into the nodes buffer, it is used for node allocation
size_t *pAllocator = (size_t*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(size_t), 0);
// this buffer holds the count of correct nodes, which is computed by the verify kernel.
cl_int *pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_int), 0);
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int), (void *) &ListLength);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNumCorrect);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host).
// Do this for all possible combinations of devices and host within the platform.
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
{
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
{
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
{
log_info("SVM: creating linked list on host ");
create_linked_lists(pNodes, numLists, ListLength);
}
else
{
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
if(error) return -1;
}
if(vi == num_devices)
{
error = verify_linked_lists(pNodes, numLists, ListLength);
if(error) return -1;
}
else
{
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNumCorrect, kernel_verify_lists, ListLength, numLists);
if(error) return -1;
}
}
}
clSVMFree(context, pNodes);
clSVMFree(context, pAllocator);
clSVMFree(context, pNumCorrect);
return 0;
}

View File

@@ -0,0 +1,241 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
const char *shared_sub_buffers_test_kernel[] = {
"typedef struct Node {\n"
" int global_id;\n"
" int position_in_list;\n"
" __global struct Node* pNext;\n"
"} Node;\n"
// create linked lists that use nodes from 2 different buffers
"__global Node* allocate_node(__global Node* pNodes1, __global Node* pNodes2, volatile __global int* allocation_index, size_t i)\n"
"{\n"
// mix things up, adjacent work items will allocate from different buffers
" if(i & 0x1)\n"
" return &pNodes1[atomic_inc(allocation_index)];\n"
" else\n"
" return &pNodes2[atomic_inc(allocation_index)];\n"
"}\n"
// The allocation_index parameter must be initialized on the host to N work-items
// The first N nodes in pNodes will be the heads of the lists.
// This tests passing 4 different sub-buffers that come from two parent buffers.
// Note that we have arguments that appear to be unused, but they are required so that system knows to get all the sub-buffers on to the device
"__kernel void create_linked_lists(__global Node* pNodes_sub1, __global Node* pNodes2_sub1, __global Node* pNodes_sub2, __global Node* pNodes2_sub2, volatile __global int* allocation_index, int list_length) \n"
"{\n"
" size_t i = get_global_id(0);\n"
" __global Node *pNode = &pNodes_sub1[i];\n"
" pNode->global_id = i;\n"
" pNode->position_in_list = 0;\n"
" __global Node *pNew;\n"
" for(int j=1; j < list_length; j++) {\n"
" pNew = allocate_node(pNodes_sub1, pNodes2_sub1, allocation_index, i);\n"
" pNew->global_id = i;\n"
" pNew->position_in_list = j;\n"
" pNode->pNext = pNew; // link new node onto end of list\n"
" pNode = pNew; // move to end of list\n"
" }\n"
"}\n"
// Note that we have arguments that appear to be unused, but they are required so that system knows to get all the sub-buffers on to the device
"__kernel void verify_linked_lists(__global Node* pNodes_sub1, __global Node* pNodes2_sub1, __global Node* pNodes_sub2, __global Node* pNodes2_sub2, volatile __global uint* num_correct, int list_length)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" __global Node *pNode = &pNodes_sub1[i];\n"
" for(int j=0; j < list_length; j++) {\n"
" if( pNode->global_id == i && pNode->position_in_list == j)\n"
" atomic_inc(num_correct);\n"
" else \n"
" break;\n"
" pNode = pNode->pNext;\n"
" }\n"
"}\n"
};
// Creates linked list using host code.
cl_int create_linked_lists_on_host_sb(cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
{
cl_int error = CL_SUCCESS;
log_info("SVM: creating linked list on host ");
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes2, "clEnqueueMapBuffer failed");
create_linked_lists(pNodes, numLists, ListLength);
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clFinish(cmdq);
test_error(error, "clFinish failed");
return error;
}
// Verify correctness of the linked list using host code.
cl_int verify_linked_lists_on_host_sb(int ci, cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
{
cl_int error = CL_SUCCESS;
//log_info(" and verifying on host ");
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
error = verify_linked_lists(pNodes, numLists, ListLength);
if(error) return -1;
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
test_error(error, "clEnqueueUnmapMemObject failed");
error = clFinish(cmdq);
test_error(error, "clFinish failed");
return error;
}
// This tests that shared sub-buffers can be created and that they inherit the flags from the parent buffer when no flags are specified.
// This tests that passing only the sub-buffers to a kernel works.
// The test is derived from the cross-buffer pointers test which
// tests that shared buffers are able to contain pointers that point to other shared buffers.
// This tests that all devices and the host share a common address space; using only the coarse-grain features.
// This is done by creating a linked list on a device and then verifying the correctness of the list
// on another device or the host.
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
// This basic test is performed for all combinations of devices and the host.
int test_shared_sub_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int error = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
if(error) return -1;
size_t numLists = num_elements;
if(numLists & 0x1) numLists++; // force even size, so we can easily create two sub-buffers of same size.
cl_int ListLength = 32;
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
test_error(error, "clCreateKernel failed");
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
test_error(error, "clCreateKernel failed");
size_t nodes_bufsize = sizeof(Node)*ListLength*numLists;
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
{
// this buffer holds some of the linked list nodes.
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, nodes_bufsize, pNodes, &error);
test_error(error, "clCreateBuffer failed.");
cl_buffer_region r;
r.origin = 0;
r.size = nodes_bufsize / 2;
// this should inherit the flag settings from nodes buffer
clMemWrapper nodes_sb1 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
test_error(error, "clCreateSubBuffer");
r.origin = nodes_bufsize / 2;
clMemWrapper nodes_sb2 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
test_error(error, "clCreateSubBuffer");
// this buffer holds some of the linked list nodes.
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
test_error(error, "clCreateBuffer failed.");
r.origin = 0;
r.size = nodes_bufsize / 2;
// this should inherit the flag settings from nodes buffer
clMemWrapper nodes2_sb1 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
test_error(error, "clCreateSubBuffer");
r.origin = nodes_bufsize / 2;
clMemWrapper nodes2_sb2 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION,(void*)&r, &error);
test_error(error, "clCreateSubBuffer");
// this buffer holds the index into the nodes buffer that is used for node allocation
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the count of correct nodes which is computed by the verify kernel.
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
test_error(error, "clCreateBuffer failed.");
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes_sb1);
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &nodes_sb2);
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void*), (void *) &allocator);
error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),(void *) &ListLength);
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes_sb1);
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &nodes_sb2);
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void*), (void *) &num_correct);
error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),(void *) &ListLength);
test_error(error, "clSetKernelArg failed");
// Create linked list on one device and verify on another device (or the host).
// Do this for all possible combinations of devices and host within the platform.
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
{
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
{
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
{
error = create_linked_lists_on_host_sb(queues[0], nodes, nodes2, ListLength, numLists);
if(error) return -1;
}
else
{
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
if(error) return -1;
}
if(vi == num_devices)
{
error = verify_linked_lists_on_host_sb(vi, queues[0], nodes, nodes2, ListLength, numLists);
if(error) return -1;
}
else
{
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
if(error) return -1;
}
} // inner loop, vi
} // outer loop, ci
}
clSVMFree(context, pNodes2);
clSVMFree(context, pNodes);
return 0;
}