mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-23 07:39:01 +00:00
Initial open source release of OpenCL 2.0 CTS.
This commit is contained in:
24
test_conformance/SVM/CMakeLists.txt
Normal file
24
test_conformance/SVM/CMakeLists.txt
Normal file
@@ -0,0 +1,24 @@
|
||||
set(MODULE_NAME SVM)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.cpp
|
||||
test_allocate_shared_buffer.cpp
|
||||
test_byte_granularity.cpp
|
||||
test_cross_buffer_pointers.cpp
|
||||
test_enqueue_api.cpp
|
||||
test_fine_grain_memory_consistency.cpp
|
||||
test_fine_grain_sync_buffers.cpp
|
||||
test_pointer_passing.cpp
|
||||
test_set_kernel_exec_info_svm_ptrs.cpp
|
||||
test_shared_address_space_coarse_grain.cpp
|
||||
test_shared_address_space_fine_grain.cpp
|
||||
test_shared_address_space_fine_grain_buffers.cpp
|
||||
test_shared_sub_buffers.cpp
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/msvc9.c
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
53
test_conformance/SVM/Makefile
Normal file
53
test_conformance/SVM/Makefile
Normal file
@@ -0,0 +1,53 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c \
|
||||
test_allocate_shared_buffer.cpp \
|
||||
test_byte_granularity.cpp \
|
||||
test_cross_buffer_pointers.cpp \
|
||||
test_enqueue_api.cpp \
|
||||
test_fine_grain_memory_consistency.cpp \
|
||||
test_fine_grain_sync_buffers.cpp \
|
||||
test_pointer_passing.cpp \
|
||||
test_set_kernel_exec_info_svm_ptrs.cpp \
|
||||
test_shared_address_space_coarse_grain.cpp \
|
||||
test_shared_address_space_fine_grain_buffers.cpp \
|
||||
test_shared_address_space_fine_grain.cpp \
|
||||
test_shared_sub_buffers.cpp \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/threadTesting.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/kernelHelpers.c \
|
||||
../../test_common/harness/typeWrappers.cpp \
|
||||
../../test_common/harness/mt19937.c \
|
||||
|
||||
DEFINES = DONT_TEST_GARBAGE_POINTERS
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
HEADERS =
|
||||
TARGET = test_SVM
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
100
test_conformance/SVM/common.h
Normal file
100
test_conformance/SVM/common.h
Normal file
@@ -0,0 +1,100 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef __COMMON_H__
|
||||
#define __COMMON_H__
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
memory_order_relaxed,
|
||||
memory_order_acquire,
|
||||
memory_order_release,
|
||||
memory_order_acq_rel,
|
||||
memory_order_seq_cst
|
||||
} cl_memory_order;
|
||||
|
||||
cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order);
|
||||
cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o);
|
||||
|
||||
template <typename T>
|
||||
bool AtomicCompareExchangeStrongExplicit(volatile T *a, T *expected, T desired,
|
||||
cl_memory_order order_success,
|
||||
cl_memory_order order_failure)
|
||||
{
|
||||
T tmp;
|
||||
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
|
||||
tmp = (T)InterlockedCompareExchange((volatile LONG *)a, (LONG)desired, *(LONG *)expected);
|
||||
#elif defined(__GNUC__)
|
||||
tmp = (T)__sync_val_compare_and_swap((volatile intptr_t*)a, (intptr_t)(*expected), (intptr_t)desired);
|
||||
#else
|
||||
log_info("Host function not implemented: atomic_compare_exchange\n");
|
||||
tmp = 0;
|
||||
#endif
|
||||
if(tmp == *expected)
|
||||
return true;
|
||||
*expected = tmp;
|
||||
return false;
|
||||
}
|
||||
|
||||
// this checks for a NULL ptr and/or an error code
|
||||
#define test_error2(error_code, ptr, msg) { if(error != 0) { test_error(error_code, msg); } else { if(NULL == ptr) {print_null_error(msg); return -1;} } }
|
||||
#define print_null_error(msg) log_error("ERROR: %s! (NULL pointer detected %s:%d)\n", msg, __FILE__, __LINE__ );
|
||||
|
||||
// max possible number of queues needed, 1 for each device in platform.
|
||||
#define MAXQ 32
|
||||
|
||||
typedef struct Node{
|
||||
cl_int global_id;
|
||||
cl_int position_in_list;
|
||||
struct Node* pNext;
|
||||
} Node;
|
||||
|
||||
extern void create_linked_lists(Node* pNodes, size_t num_lists, int list_length);
|
||||
extern cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length);
|
||||
|
||||
extern cl_int create_linked_lists_on_device(int qi, cl_command_queue q, cl_mem allocator, cl_kernel k, size_t numLists );
|
||||
extern cl_int verify_linked_lists_on_device(int qi, cl_command_queue q, cl_mem num_correct, cl_kernel k, cl_int ListLength, size_t numLists );
|
||||
extern cl_int create_linked_lists_on_device_no_map(int qi, cl_command_queue q, size_t *pAllocator, cl_kernel k, size_t numLists );
|
||||
extern cl_int verify_linked_lists_on_device_no_map(int qi, cl_command_queue q, cl_int *pNum_correct, cl_kernel k, cl_int ListLength, size_t numLists );
|
||||
|
||||
extern int test_byte_granularity(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_fine_grain_memory_consistency(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_fine_grain_sync_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_svm_pointer_passing(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_allocate_shared_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_shared_sub_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps);
|
||||
|
||||
extern const char *linked_list_create_and_verify_kernels[];
|
||||
|
||||
#endif // #ifndef __COMMON_H__
|
||||
|
||||
338
test_conformance/SVM/main.cpp
Normal file
338
test_conformance/SVM/main.cpp
Normal file
@@ -0,0 +1,338 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
#include "common.h"
|
||||
|
||||
// SVM Atomic wrappers.
|
||||
// Platforms that support SVM atomics (atomics that work across the host and devices) need to implement these host side functions correctly.
|
||||
// Platforms that do not support SVM atomics can simpy implement these functions as empty stubs since the functions will not be called.
|
||||
// For now only Windows x86 is implemented, add support for other platforms as needed.
|
||||
cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order)
|
||||
{
|
||||
#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
|
||||
return *pValue; // provided the value is aligned x86 doesn't need anything more than this for seq_cst.
|
||||
#elif defined(__GNUC__)
|
||||
return __sync_add_and_fetch(pValue, 0);
|
||||
#else
|
||||
log_error("ERROR: AtomicLoadExplicit function not implemented\n");
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
// all the x86 atomics are seq_cst, so don't need to do anything with the memory order parameter.
|
||||
cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o)
|
||||
{
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
|
||||
return InterlockedExchangeAdd( (volatile LONG*) object, operand);
|
||||
#elif defined(__GNUC__)
|
||||
return __sync_fetch_and_add(object, operand);
|
||||
#else
|
||||
log_error("ERROR: AtomicFetchAddExplicit function not implemented\n");
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
cl_int AtomicExchangeExplicit(volatile cl_int *object, cl_int desired, cl_memory_order mo)
|
||||
{
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
|
||||
return InterlockedExchange( (volatile LONG*) object, desired);
|
||||
#elif defined(__GNUC__)
|
||||
return __sync_lock_test_and_set(object, desired);
|
||||
#else
|
||||
log_error("ERROR: AtomicExchangeExplicit function not implemented\n");
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
const char *linked_list_create_and_verify_kernels[] = {
|
||||
"typedef struct Node {\n"
|
||||
" int global_id;\n"
|
||||
" int position_in_list;\n"
|
||||
" __global struct Node* pNext;\n"
|
||||
"} Node;\n"
|
||||
"\n"
|
||||
// The allocation_index parameter must be initialized on the host to N work-items
|
||||
// The first N nodes in pNodes will be the heads of the lists.
|
||||
"__kernel void create_linked_lists(__global Node* pNodes, volatile __attribute__((nosvm)) __global int* allocation_index, int list_length)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" __global Node *pNode = &pNodes[i];\n"
|
||||
"\n"
|
||||
" pNode->global_id = i;\n"
|
||||
" pNode->position_in_list = 0;\n"
|
||||
"\n"
|
||||
" __global Node *pNew;\n"
|
||||
" for(int j=1; j < list_length; j++)\n"
|
||||
" {\n"
|
||||
" pNew = &pNodes[ atomic_inc(allocation_index) ];// allocate a new node\n"
|
||||
" pNew->global_id = i;\n"
|
||||
" pNew->position_in_list = j;\n"
|
||||
" pNode->pNext = pNew; // link new node onto end of list\n"
|
||||
" pNode = pNew; // move to end of list\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
|
||||
"__kernel void verify_linked_lists(__global Node* pNodes, volatile __global uint* num_correct, int list_length)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" __global Node *pNode = &pNodes[i];\n"
|
||||
"\n"
|
||||
" for(int j=0; j < list_length; j++)\n"
|
||||
" {\n"
|
||||
" if( pNode->global_id == i && pNode->position_in_list == j)\n"
|
||||
" {\n"
|
||||
" atomic_inc(num_correct);\n"
|
||||
" } \n"
|
||||
" else {\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" pNode = pNode->pNext;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
// The first N nodes in pNodes will be the heads of the lists.
|
||||
void create_linked_lists(Node* pNodes, size_t num_lists, int list_length)
|
||||
{
|
||||
size_t allocation_index = num_lists; // heads of lists are in first num_lists nodes.
|
||||
|
||||
for(cl_uint i = 0; i < num_lists; i++)
|
||||
{
|
||||
Node *pNode = &pNodes[i];
|
||||
pNode->global_id = i;
|
||||
pNode->position_in_list = 0;
|
||||
Node *pNew;
|
||||
for(int j=1; j < list_length; j++)
|
||||
{
|
||||
pNew = &pNodes[ allocation_index++ ];// allocate a new node
|
||||
pNew->global_id = i;
|
||||
pNew->position_in_list = j;
|
||||
pNode->pNext = pNew; // link new node onto end of list
|
||||
pNode = pNew; // move to end of list
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length)
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
int numCorrect = 0;
|
||||
|
||||
log_info(" and verifying on host ");
|
||||
for(cl_uint i=0; i < num_lists; i++)
|
||||
{
|
||||
Node *pNode = &pNodes[i];
|
||||
for(int j=0; j < list_length; j++)
|
||||
{
|
||||
if( pNode->global_id == i && pNode->position_in_list == j)
|
||||
{
|
||||
numCorrect++;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
pNode = pNode->pNext;
|
||||
}
|
||||
}
|
||||
if(numCorrect != list_length * (cl_uint)num_lists)
|
||||
{
|
||||
error = -1;
|
||||
log_info("Failed\n");
|
||||
}
|
||||
else
|
||||
log_info("Passed\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
// Note that we don't use the context provided by the test harness since it doesn't support multiple devices,
|
||||
// so we create are own context here that has all devices, we use the same platform that the harness used.
|
||||
cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps)
|
||||
{
|
||||
cl_int error;
|
||||
|
||||
cl_platform_id platform_id;
|
||||
// find out what platform the harness is using.
|
||||
error = clGetDeviceInfo(device_from_harness, CL_DEVICE_PLATFORM,sizeof(cl_platform_id),&platform_id,NULL);
|
||||
test_error(error,"clGetDeviceInfo failed");
|
||||
|
||||
error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 0, NULL, num_devices );
|
||||
test_error(error, "clGetDeviceIDs failed");
|
||||
|
||||
std::vector<cl_device_id> devicesTmp(*num_devices), devices, capable_devices;
|
||||
|
||||
error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, *num_devices, &devicesTmp[0], NULL );
|
||||
test_error(error, "clGetDeviceIDs failed");
|
||||
|
||||
devices.push_back(device_from_harness);
|
||||
for (size_t i = 0; i < devicesTmp.size(); ++i)
|
||||
{
|
||||
if (device_from_harness != devicesTmp[i])
|
||||
devices.push_back(devicesTmp[i]);
|
||||
}
|
||||
|
||||
// Select only the devices that support the SVM level needed for the test.
|
||||
// Note that if requested SVM capabilities are not supported by any device then the test still passes (even though it does not execute).
|
||||
cl_device_svm_capabilities caps;
|
||||
cl_uint num_capable_devices = 0;
|
||||
for(cl_uint i = 0; i < *num_devices; i++)
|
||||
{
|
||||
size_t ret_len = 0;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, 0, 0, &ret_len);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetDeviceInfo failed %s\n", IGetErrorString(error));
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<char> oclVersion(ret_len + 1);
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(char) * oclVersion.size(), &oclVersion[0], 0);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetDeviceInfo failed %s\n", IGetErrorString(error));
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::string versionStr(&oclVersion[7]);
|
||||
std::stringstream stream;
|
||||
stream << versionStr;
|
||||
|
||||
double version = 0.0;
|
||||
stream >> version;
|
||||
|
||||
if(device_from_harness != devices[i] && version < 2.0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
|
||||
test_error(error,"clGetDeviceInfo failed for CL_DEVICE_MEM_SHARING");
|
||||
if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS)))
|
||||
{
|
||||
log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value");
|
||||
return -1;
|
||||
}
|
||||
if((caps & required_svm_caps) == required_svm_caps)
|
||||
{
|
||||
capable_devices.push_back(devices[i]);
|
||||
++num_capable_devices;
|
||||
}
|
||||
}
|
||||
devices = capable_devices; // the only devices we care about from here on are the ones capable of supporting the requested SVM level.
|
||||
*num_devices = num_capable_devices;
|
||||
if(num_capable_devices == 0)
|
||||
// if(svm_level > CL_DEVICE_COARSE_SVM && 0 == num_capable_devices)
|
||||
{
|
||||
log_info("Requested SVM level not supported by any device on this platform, test not executed.\n");
|
||||
return 1; // 1 indicates do not execute, but counts as passing.
|
||||
}
|
||||
|
||||
cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, NULL };
|
||||
*context = clCreateContext(context_properties, *num_devices, &devices[0], NULL, NULL, &error);
|
||||
test_error(error, "Unable to create context" );
|
||||
|
||||
// *queues = (cl_command_queue *) malloc( *num_devices * sizeof( cl_command_queue ) );
|
||||
|
||||
for(cl_uint i = 0; i < *num_devices; i++)
|
||||
{
|
||||
queues[i] = clCreateCommandQueueWithProperties(*context, devices[i], 0, &error);
|
||||
test_error(error, "clCreateCommandQueue failed");
|
||||
}
|
||||
|
||||
if(ppCodeString)
|
||||
{
|
||||
*program = clCreateProgramWithSource(*context, 1, ppCodeString , NULL, &error);
|
||||
test_error( error, "clCreateProgramWithSource failed" );
|
||||
|
||||
error = clBuildProgram(*program,0,NULL,"-cl-std=CL2.0",NULL,NULL);
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
print_error(error, "clBuildProgram failed");
|
||||
|
||||
char *buildLog = NULL;
|
||||
size_t buildLogSize = 0;
|
||||
error = clGetProgramBuildInfo (*program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &buildLogSize);
|
||||
buildLog = (char*)malloc(buildLogSize);
|
||||
memset(buildLog, 0, buildLogSize);
|
||||
error = clGetProgramBuildInfo (*program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL);
|
||||
char string[15000];
|
||||
sprintf(string,"%s", buildLog);
|
||||
//MessageBox(NULL,(LPCWSTR)string,(LPCWSTR)"OpenCL Error",MB_OK);
|
||||
//MessageBox(NULL,string,"OpenCL Error",MB_OK);
|
||||
free(buildLog);
|
||||
log_info("%s",string);
|
||||
if (error) {
|
||||
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
basefn basefn_list[] = {
|
||||
test_byte_granularity,
|
||||
test_set_kernel_exec_info_svm_ptrs,
|
||||
test_fine_grain_memory_consistency,
|
||||
test_fine_grain_sync_buffers,
|
||||
test_shared_address_space_fine_grain,
|
||||
test_shared_sub_buffers,
|
||||
test_shared_address_space_fine_grain_buffers,
|
||||
test_allocate_shared_buffer,
|
||||
test_shared_address_space_coarse_grain_old_api,
|
||||
test_shared_address_space_coarse_grain_new_api,
|
||||
test_cross_buffer_pointers_coarse_grain,
|
||||
test_svm_pointer_passing,
|
||||
test_enqueue_api,
|
||||
};
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"svm_byte_granularity",
|
||||
"svm_set_kernel_exec_info_svm_ptrs",
|
||||
"svm_fine_grain_memory_consistency",
|
||||
"svm_fine_grain_sync_buffers",
|
||||
"svm_shared_address_space_fine_grain",
|
||||
"svm_shared_sub_buffers",
|
||||
"svm_shared_address_space_fine_grain_buffers",
|
||||
"svm_allocate_shared_buffer",
|
||||
"svm_shared_address_space_coarse_grain_old_api",
|
||||
"svm_shared_address_space_coarse_grain_new_api",
|
||||
"svm_cross_buffer_pointers_coarse_grain",
|
||||
"svm_pointer_passing",
|
||||
"svm_enqueue_api",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, true, 0 );
|
||||
}
|
||||
|
||||
|
||||
|
||||
107
test_conformance/SVM/test_allocate_shared_buffer.cpp
Normal file
107
test_conformance/SVM/test_allocate_shared_buffer.cpp
Normal file
@@ -0,0 +1,107 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
const cl_mem_flags flag_set[] = {
|
||||
CL_MEM_READ_WRITE,
|
||||
CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_READ_ONLY,
|
||||
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
|
||||
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
|
||||
CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
|
||||
0
|
||||
};
|
||||
const char* flag_set_names[] = {
|
||||
"CL_MEM_READ_WRITE",
|
||||
"CL_MEM_WRITE_ONLY",
|
||||
"CL_MEM_READ_ONLY",
|
||||
"CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER",
|
||||
"CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER",
|
||||
"CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER",
|
||||
"CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
|
||||
"CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
|
||||
"CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
|
||||
"0"
|
||||
};
|
||||
|
||||
|
||||
int test_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
cl_device_svm_capabilities caps;
|
||||
err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
|
||||
test_error(err,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
|
||||
|
||||
// under construction...
|
||||
err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(err) return -1;
|
||||
|
||||
size_t size = 1024;
|
||||
|
||||
// iteration over flag combos
|
||||
int num_flags = sizeof(flag_set)/sizeof(cl_mem_flags);
|
||||
for(int i = 0; i < num_flags; i++)
|
||||
{
|
||||
if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0 && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0)
|
||||
|| ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0 && (caps & CL_DEVICE_SVM_ATOMICS) == 0))
|
||||
{
|
||||
log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]);
|
||||
cl_char *pBufData1 = (cl_char*) clSVMAlloc(context, flag_set[i], size, 0);
|
||||
if(pBufData1 == NULL)
|
||||
{
|
||||
log_error("SVMalloc returned NULL");
|
||||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, size, pBufData1, &err);
|
||||
test_error(err,"clCreateBuffer failed");
|
||||
|
||||
cl_char *pBufData2 = NULL;
|
||||
cl_uint flags = CL_MAP_READ | CL_MAP_READ;
|
||||
if(flag_set[i] & CL_MEM_HOST_READ_ONLY) flags ^= CL_MAP_WRITE;
|
||||
if(flag_set[i] & CL_MEM_HOST_WRITE_ONLY) flags ^= CL_MAP_READ;
|
||||
|
||||
if(!(flag_set[i] & CL_MEM_HOST_NO_ACCESS))
|
||||
{
|
||||
pBufData2 = (cl_char*) clEnqueueMapBuffer(queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL,NULL, &err);
|
||||
test_error(err, "clEnqueueMapBuffer failed");
|
||||
|
||||
if(pBufData2 != pBufData1 || NULL == pBufData1)
|
||||
{
|
||||
log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clSVMFree(context, pBufData1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
148
test_conformance/SVM/test_byte_granularity.cpp
Normal file
148
test_conformance/SVM/test_byte_granularity.cpp
Normal file
@@ -0,0 +1,148 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
const char *byte_manipulation_kernels[] = {
|
||||
// Each device will write it's id into the bytes that it "owns", ownership is based on round robin (global_id % num_id)
|
||||
// num_id is equal to number of SVM devices in the system plus one (for the host code).
|
||||
// id is the index (id) of the device that this kernel is executing on.
|
||||
// For example, if there are 2 SVM devices and the host; the buffer should look like this after each device and the host write their id's:
|
||||
// 0, 1, 2, 0, 1, 2, 0, 1, 2...
|
||||
"__kernel void write_owned_locations(__global char* a, uint num_id, uint id)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" int owner = i % num_id;\n"
|
||||
" if(id == owner) \n"
|
||||
" a[i] = id;\n" // modify location if it belongs to this device, write id
|
||||
"}\n"
|
||||
|
||||
// Verify that a device can see the byte sized updates from the other devices, sum up the device id's and see if they match expected value.
|
||||
// Note: this must be called with a reduced NDRange so that neighbor acesses don't go past end of buffer.
|
||||
// For example if there are two SVM devices and the host (3 total devices) the buffer should look like this:
|
||||
// 0,1,2,0,1,2...
|
||||
// and the expected sum at each point is 0+1+2 = 3.
|
||||
"__kernel void sum_neighbor_locations(__global char* a, uint num_devices, volatile __global uint* error_count)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" uint expected_sum = (num_devices * (num_devices - 1))/2;\n"
|
||||
" uint sum = 0;\n"
|
||||
" for(uint j=0; j<num_devices; j++) {\n"
|
||||
" sum += a[i + j];\n" // add my neighbors to the right
|
||||
" }\n"
|
||||
" if(sum != expected_sum)\n"
|
||||
" atomic_inc(error_count);\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
|
||||
int test_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper k1,k2;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
cl_int rval = CL_SUCCESS;
|
||||
|
||||
err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
|
||||
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(err < 0) return -1; // fail test.
|
||||
|
||||
cl_uint num_devices_plus_host = num_devices + 1;
|
||||
|
||||
k1 = clCreateKernel(program, "write_owned_locations", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
k2 = clCreateKernel(program, "sum_neighbor_locations", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
|
||||
|
||||
cl_char *pA = (cl_char*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_char) * num_elements, 0);
|
||||
|
||||
cl_uint **error_counts = (cl_uint**) malloc(sizeof(void*) * num_devices);
|
||||
|
||||
for(cl_uint i=0; i < num_devices; i++) {
|
||||
error_counts[i] = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint), 0);
|
||||
*error_counts[i] = 0;
|
||||
}
|
||||
for(int i=0; i < num_elements; i++) pA[i] = -1;
|
||||
|
||||
err |= clSetKernelArgSVMPointer(k1, 0, pA);
|
||||
err |= clSetKernelArg(k1, 1, sizeof(cl_uint), &num_devices_plus_host);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
// get all the devices going simultaneously
|
||||
size_t element_num = num_elements;
|
||||
for(cl_uint d=0; d < num_devices; d++) // device ids starting at 1.
|
||||
{
|
||||
err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL, 0, NULL, NULL);
|
||||
test_error(err,"clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
for(cl_uint d=0; d < num_devices; d++) clFlush(queues[d]);
|
||||
|
||||
cl_uint host_id = num_devices; // host code will take the id above the devices.
|
||||
for(int i = (int)num_devices; i < num_elements; i+= num_devices_plus_host) pA[i] = host_id;
|
||||
|
||||
for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
|
||||
|
||||
// now check that each device can see the byte writes made by the other devices.
|
||||
|
||||
err |= clSetKernelArgSVMPointer(k2, 0, pA);
|
||||
err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
// adjusted so k2 doesn't read past end of buffer
|
||||
size_t adjusted_num_elements = num_elements - num_devices;
|
||||
for(cl_uint id = 0; id < num_devices; id++)
|
||||
{
|
||||
err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
err = clEnqueueNDRangeKernel(queues[id], k2, 1, NULL, &adjusted_num_elements, NULL, 0, NULL, NULL);
|
||||
test_error(err,"clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
|
||||
|
||||
bool failed = false;
|
||||
|
||||
// see if any of the devices found errors
|
||||
for(cl_uint i=0; i < num_devices; i++) {
|
||||
if(*error_counts[i] > 0)
|
||||
failed = true;
|
||||
}
|
||||
cl_uint expected = (num_devices_plus_host * (num_devices_plus_host - 1))/2;
|
||||
// check that host can see the byte writes made by the devices.
|
||||
for(cl_uint i = 0; i < num_elements - num_devices_plus_host; i++)
|
||||
{
|
||||
int sum = 0;
|
||||
for(cl_uint j=0; j < num_devices_plus_host; j++) sum += pA[i+j];
|
||||
if(sum != expected)
|
||||
failed = true;
|
||||
}
|
||||
|
||||
clSVMFree(context, pA);
|
||||
for(cl_uint i=0; i < num_devices; i++) clSVMFree(context, error_counts[i]);
|
||||
|
||||
if(failed)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
219
test_conformance/SVM/test_cross_buffer_pointers.cpp
Normal file
219
test_conformance/SVM/test_cross_buffer_pointers.cpp
Normal file
@@ -0,0 +1,219 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
// create linked lists that use nodes from two different buffers.
|
||||
const char *SVMCrossBufferPointers_test_kernel[] = {
|
||||
"\n"
|
||||
"typedef struct Node {\n"
|
||||
" int global_id;\n"
|
||||
" int position_in_list;\n"
|
||||
" __global struct Node* pNext;\n"
|
||||
"} Node;\n"
|
||||
"\n"
|
||||
"__global Node* allocate_node(__global Node* pNodes1, __global Node* pNodes2, volatile __global int* allocation_index, size_t i)\n"
|
||||
"{\n"
|
||||
// mix things up, adjacent work items will allocate from different buffers
|
||||
" if(i & 0x1)\n"
|
||||
" return &pNodes1[atomic_inc(allocation_index)];\n"
|
||||
" else\n"
|
||||
" return &pNodes2[atomic_inc(allocation_index)];\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
// The allocation_index parameter must be initialized on the host to N work-items
|
||||
// The first N nodes in pNodes will be the heads of the lists.
|
||||
"__kernel void create_linked_lists(__global Node* pNodes, __global Node* pNodes2, volatile __global int* allocation_index, int list_length)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" __global Node *pNode = &pNodes[i];\n"
|
||||
"\n"
|
||||
" pNode->global_id = i;\n"
|
||||
" pNode->position_in_list = 0;\n"
|
||||
"\n"
|
||||
" __global Node *pNew;\n"
|
||||
" for(int j=1; j < list_length; j++)\n"
|
||||
" {\n"
|
||||
" pNew = allocate_node(pNodes, pNodes2, allocation_index, i);\n"
|
||||
" pNew->global_id = i;\n"
|
||||
" pNew->position_in_list = j;\n"
|
||||
" pNode->pNext = pNew; // link new node onto end of list\n"
|
||||
" pNode = pNew; // move to end of list\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__kernel void verify_linked_lists(__global Node* pNodes, __global Node* pNodes2, volatile __global uint* num_correct, int list_length)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" __global Node *pNode = &pNodes[i];\n"
|
||||
"\n"
|
||||
" for(int j=0; j < list_length; j++)\n"
|
||||
" {\n"
|
||||
" if( pNode->global_id == i && pNode->position_in_list == j)\n"
|
||||
" {\n"
|
||||
" atomic_inc(num_correct);\n"
|
||||
" }\n"
|
||||
" else {\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" pNode = pNode->pNext;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
// Creates linked list using host code.
|
||||
cl_int create_linked_lists_on_host(cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
log_info("SVM: creating linked list on host ");
|
||||
|
||||
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
|
||||
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes2, "clEnqueueMapBuffer failed");
|
||||
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
// Verify correctness of the linked list using host code.
|
||||
cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
//log_info(" and verifying on host ");
|
||||
|
||||
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if(error) return -1;
|
||||
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
// This tests that shared buffers are able to contain pointers that point to other shared buffers.
|
||||
// This tests that all devices and the host share a common address space; using only the coarse-grain features.
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host.
|
||||
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
|
||||
// This basic test is performed for all combinations of devices and the host.
|
||||
int test_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return -1;
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this buffer holds some of the linked list nodes.
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
|
||||
|
||||
// this buffer holds some of the linked list nodes.
|
||||
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
|
||||
|
||||
{
|
||||
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// this buffer holds the index into the nodes buffer that is used for node allocation
|
||||
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// this buffer holds the count of correct nodes which is computed by the verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
|
||||
//error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) pNodes);
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int), (void *) &ListLength);
|
||||
|
||||
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int), (void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
error = create_linked_lists_on_host(queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists_on_host(vi, queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
} // inner loop, vi
|
||||
} // outer loop, ci
|
||||
}
|
||||
|
||||
clSVMFree(context, pNodes2);
|
||||
clSVMFree(context, pNodes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
254
test_conformance/SVM/test_enqueue_api.cpp
Normal file
254
test_conformance/SVM/test_enqueue_api.cpp
Normal file
@@ -0,0 +1,254 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
typedef struct
|
||||
{
|
||||
cl_uint status;
|
||||
cl_uint num_svm_pointers;
|
||||
std::vector<void *> svm_pointers;
|
||||
} CallbackData;
|
||||
|
||||
void generate_data(std::vector<cl_uchar> &data, size_t size, MTdata seed)
|
||||
{
|
||||
cl_uint randomData = genrand_int32(seed);
|
||||
cl_uint bitsLeft = 32;
|
||||
|
||||
for( size_t i = 0; i < size; i++ )
|
||||
{
|
||||
if( 0 == bitsLeft)
|
||||
{
|
||||
randomData = genrand_int32(seed);
|
||||
bitsLeft = 32;
|
||||
}
|
||||
data[i] = (cl_uchar)( randomData & 255 );
|
||||
randomData >>= 8; randomData -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
//callback which will be passed to clEnqueueSVMFree command
|
||||
void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_pointers, void * svm_pointers[], void * user_data)
|
||||
{
|
||||
CallbackData *data = (CallbackData *)user_data;
|
||||
data->num_svm_pointers = num_svm_pointers;
|
||||
data->svm_pointers.resize(num_svm_pointers, 0);
|
||||
|
||||
cl_context context;
|
||||
if(clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, 0) != CL_SUCCESS)
|
||||
{
|
||||
log_error("clGetCommandQueueInfo failed in the callback\n");
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_svm_pointers; ++i)
|
||||
{
|
||||
data->svm_pointers[i] = svm_pointers[i];
|
||||
clSVMFree(context, svm_pointers[i]);
|
||||
}
|
||||
|
||||
data->status = 1;
|
||||
}
|
||||
|
||||
int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
cl_uint num_devices = 0;
|
||||
const size_t elementNum = 1024;
|
||||
const size_t numSVMBuffers = 32;
|
||||
cl_int error = CL_SUCCESS;
|
||||
RandomSeed seed(0);
|
||||
|
||||
error = create_cl_objects(deviceID, NULL, &context, NULL, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return -1;
|
||||
|
||||
queue = queues[0];
|
||||
|
||||
//all possible sizes of vectors and scalars
|
||||
size_t typeSizes[] = {
|
||||
sizeof(cl_uchar),
|
||||
sizeof(cl_uchar2),
|
||||
sizeof(cl_uchar3),
|
||||
sizeof(cl_uchar4),
|
||||
sizeof(cl_uchar8),
|
||||
sizeof(cl_uchar16),
|
||||
sizeof(cl_ushort),
|
||||
sizeof(cl_ushort2),
|
||||
sizeof(cl_ushort3),
|
||||
sizeof(cl_ushort4),
|
||||
sizeof(cl_ushort8),
|
||||
sizeof(cl_ushort16),
|
||||
sizeof(cl_uint),
|
||||
sizeof(cl_uint2),
|
||||
sizeof(cl_uint3),
|
||||
sizeof(cl_uint4),
|
||||
sizeof(cl_uint8),
|
||||
sizeof(cl_uint16),
|
||||
sizeof(cl_ulong),
|
||||
sizeof(cl_ulong2),
|
||||
sizeof(cl_ulong3),
|
||||
sizeof(cl_ulong4),
|
||||
sizeof(cl_ulong8),
|
||||
sizeof(cl_ulong16),
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < ( sizeof(typeSizes) / sizeof(typeSizes[0]) ); ++i)
|
||||
{
|
||||
//generate initial data
|
||||
std::vector<cl_uchar> fillData0(typeSizes[i]), fillData1(typeSizes[i], 0), fillData2(typeSizes[i]);
|
||||
generate_data(fillData0, typeSizes[i], seed);
|
||||
generate_data(fillData2, typeSizes[i], seed);
|
||||
|
||||
cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum * typeSizes[i], 0);
|
||||
cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum * typeSizes[i], 0);
|
||||
|
||||
clEventWrapper userEvent = clCreateUserEvent(context, &error);
|
||||
test_error(error, "clCreateUserEvent failed");
|
||||
|
||||
clEventWrapper eventMemFill;
|
||||
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0], typeSizes[i], elementNum * typeSizes[i], 1, &userEvent, &eventMemFill);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
clEventWrapper eventMemcpy;
|
||||
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, elementNum * typeSizes[i], 1, &eventMemFill, &eventMemcpy);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
error = clSetUserEventStatus(userEvent, CL_COMPLETE);
|
||||
test_error(error, "clSetUserEventStatus failed");
|
||||
|
||||
clEventWrapper eventMap;
|
||||
error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, dstBuffer, elementNum * typeSizes[i], 1, &eventMemcpy, &eventMap);
|
||||
test_error(error, "clEnqueueSVMMap failed");
|
||||
|
||||
error = clWaitForEvents(1, &eventMap);
|
||||
test_error(error, "clWaitForEvents failed");
|
||||
|
||||
//data verification
|
||||
for (size_t j = 0; j < elementNum * typeSizes[i]; ++j)
|
||||
{
|
||||
if (dstBuffer[j] != fillData0[j % typeSizes[i]])
|
||||
{
|
||||
log_error("Invalid data at index %ld, expected %d, got %d\n", j, fillData0[j % typeSizes[i]], dstBuffer[j]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
clEventWrapper eventUnmap;
|
||||
error = clEnqueueSVMUnmap(queue, dstBuffer, 0, 0, &eventUnmap);
|
||||
test_error(error, "clEnqueueSVMUnmap failed");
|
||||
|
||||
error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData2[0], typeSizes[i], elementNum * typeSizes[i] / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
error = clEnqueueSVMMemFill(queue, dstBuffer + elementNum * typeSizes[i] / 2, &fillData2[0], typeSizes[i], elementNum * typeSizes[i] / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemFill failed");
|
||||
|
||||
error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, elementNum * typeSizes[i] / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
error = clEnqueueSVMMemcpy(queue, CL_TRUE, dstBuffer + elementNum * typeSizes[i] / 2, srcBuffer + elementNum * typeSizes[i] / 2, elementNum * typeSizes[i] / 2, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMMemcpy failed");
|
||||
|
||||
void *ptrs[] = {(void *)srcBuffer, (void *)dstBuffer};
|
||||
|
||||
clEventWrapper eventFree;
|
||||
error = clEnqueueSVMFree(queue, 2, ptrs, 0, 0, 0, 0, &eventFree);
|
||||
test_error(error, "clEnqueueSVMFree failed");
|
||||
|
||||
error = clWaitForEvents(1, &eventFree);
|
||||
test_error(error, "clWaitForEvents failed");
|
||||
|
||||
//event info verification for new SVM commands
|
||||
cl_command_type commandType;
|
||||
error = clGetEventInfo(eventMemFill, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MEMFILL)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMMemFill\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MEMCPY)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMMemcpy\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetEventInfo(eventMap, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_MAP)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMMap\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetEventInfo(eventUnmap, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_UNMAP)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMUnmap\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
|
||||
test_error(error, "clGetEventInfo failed");
|
||||
if (commandType != CL_COMMAND_SVM_FREE)
|
||||
{
|
||||
log_error("Invalid command type returned for clEnqueueSVMFree\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<void *> buffers(numSVMBuffers, 0);
|
||||
for(size_t i = 0; i < numSVMBuffers; ++i) buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0);
|
||||
|
||||
//verify if callback is triggered correctly
|
||||
CallbackData data;
|
||||
data.status = 0;
|
||||
|
||||
error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0], callback_svm_free, &data, 0, 0, 0);
|
||||
test_error(error, "clEnqueueSVMFree failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
//wait for the callback
|
||||
while(data.status == 0) { }
|
||||
|
||||
//check if number of SVM pointers returned in the callback matches with expected
|
||||
if (data.num_svm_pointers != buffers.size())
|
||||
{
|
||||
log_error("Invalid number of SVM pointers returned in the callback, expected: %ld, got: %d\n", buffers.size(), data.num_svm_pointers);
|
||||
return -1;
|
||||
}
|
||||
|
||||
//check if pointers returned in callback are correct
|
||||
for (size_t i = 0; i < buffers.size(); ++i)
|
||||
{
|
||||
if (data.svm_pointers[i] != buffers[i])
|
||||
{
|
||||
log_error("Invalid SVM pointer returned in the callback, idx: %ld\n", i);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
176
test_conformance/SVM/test_fine_grain_memory_consistency.cpp
Normal file
176
test_conformance/SVM/test_fine_grain_memory_consistency.cpp
Normal file
@@ -0,0 +1,176 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
static char hash_table_kernel[] =
|
||||
"#if 0\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
|
||||
"#endif\n"
|
||||
"typedef struct BinNode {\n"
|
||||
" int value;\n"
|
||||
" atomic_uintptr_t pNext;\n"
|
||||
"} BinNode;\n"
|
||||
|
||||
"__kernel void build_hash_table(__global uint* input, __global BinNode* pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
|
||||
"{\n"
|
||||
" __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, 1, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
|
||||
" uint i = get_global_id(0);\n"
|
||||
" uint b = input[i] % numBins;\n"
|
||||
" pNew->value = input[i];\n"
|
||||
" uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), memory_order_seq_cst, memory_scope_all_svm_devices);\n"
|
||||
" do\n"
|
||||
" {\n"
|
||||
" atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, memory_scope_all_svm_devices);\n" // always inserting at head of list
|
||||
" } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), &next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, memory_scope_all_svm_devices));\n"
|
||||
"}\n";
|
||||
|
||||
typedef struct BinNode{
|
||||
cl_uint value;
|
||||
struct BinNode* pNext;
|
||||
} BinNode;
|
||||
|
||||
void build_hash_table_on_host(cl_context c, cl_uint* input, size_t inputSize, BinNode* pNodes, cl_int volatile *pNumNodes, cl_uint numBins)
|
||||
{
|
||||
for(cl_uint i = 0; i < inputSize; i++)
|
||||
{
|
||||
BinNode *pNew = &pNodes[ AtomicFetchAddExplicit(pNumNodes, 1, memory_order_relaxed) ];
|
||||
cl_uint b = input[i] % numBins;
|
||||
pNew->value = input[i];
|
||||
|
||||
BinNode *next = pNodes[b].pNext;
|
||||
do {
|
||||
pNew->pNext = next; // always inserting at head of list
|
||||
} while(!AtomicCompareExchangeStrongExplicit(&(pNodes[b].pNext), &next, pNew, memory_order_relaxed, memory_order_seq_cst));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int launch_kernels_and_verify(clContextWrapper &context, clCommandQueueWrapper* queues, clKernelWrapper &kernel, cl_uint num_devices, cl_uint numBins, size_t num_pixels)
|
||||
{
|
||||
int err = CL_SUCCESS;
|
||||
cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0);
|
||||
BinNode *pNodes = (BinNode*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(BinNode) * (num_pixels * (num_devices + 1) + numBins), 0);
|
||||
cl_int *pNumNodes = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int), 0);
|
||||
|
||||
*pNumNodes = numBins; // using the first numBins nodes to hold the list heads.
|
||||
for(cl_uint i=0;i<numBins;i++) {
|
||||
pNodes[i].pNext = NULL;
|
||||
};
|
||||
|
||||
for(cl_uint i=0; i < num_pixels; i++) pInputImage[i] = i;
|
||||
|
||||
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 1, pNodes);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 2, pNumNodes);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof(cl_uint), (void*) &numBins);
|
||||
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
cl_event done;
|
||||
// get all the devices going simultaneously, each device (and the host) will insert all the pixels.
|
||||
for(cl_uint d=0; d<num_devices; d++)
|
||||
{
|
||||
err = clEnqueueNDRangeKernel(queues[d], kernel, 1, NULL, &num_pixels, 0, 0, NULL, &done);
|
||||
test_error(err,"clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
for(cl_uint d=0; d<num_devices; d++) clFlush(queues[d]);
|
||||
|
||||
// wait until we see some activity from a device (try to run host side simultaneously).
|
||||
while(numBins == AtomicLoadExplicit(pNumNodes, memory_order_relaxed));
|
||||
|
||||
build_hash_table_on_host(context, pInputImage, num_pixels, pNodes, pNumNodes, numBins);
|
||||
|
||||
for(cl_uint d=0; d<num_devices; d++) clFinish(queues[d]);
|
||||
|
||||
cl_uint num_items = 0;
|
||||
// check correctness of each bin in the hash table.
|
||||
for(cl_uint i = 0; i < numBins; i++)
|
||||
{
|
||||
BinNode *pNode = pNodes[i].pNext;
|
||||
while(pNode)
|
||||
{
|
||||
if((pNode->value % numBins) != i)
|
||||
{
|
||||
log_error("Something went wrong, item is in wrong hash bucket\n");
|
||||
break;
|
||||
}
|
||||
num_items++;
|
||||
pNode = pNode->pNext;
|
||||
}
|
||||
}
|
||||
|
||||
clSVMFree(context, pInputImage);
|
||||
clSVMFree(context, pNodes);
|
||||
clSVMFree(context, pNumNodes);
|
||||
// each device and the host inserted all of the pixels, check that none are missing.
|
||||
if(num_items != num_pixels * (num_devices + 1) )
|
||||
{
|
||||
log_error("The hash table is not correct, num items %d, expected num items: %d\n", num_items, num_pixels * (num_devices + 1));
|
||||
return -1; // test did not pass
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This tests for memory consistency across devices and the host.
|
||||
// Each device and the host simultaneously insert values into a single hash table.
|
||||
// Each bin in the hash table is a linked list. Each bin is protected against simultaneous
|
||||
// update using a lock free technique. The correctness of the list is verfied on the host.
|
||||
// This test requires the new OpenCL 2.0 atomic operations that implement the new seq_cst memory ordering.
|
||||
int test_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
if (sizeof(void *) == 8 && (!is_extension_available(deviceID, "cl_khr_int64_base_atomics") || !is_extension_available(deviceID, "cl_khr_int64_extended_atomics")))
|
||||
{
|
||||
log_info("WARNING: test skipped. 'cl_khr_int64_base_atomics' and 'cl_khr_int64_extended_atomics' extensions are not supported\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Make pragmas visible for 64-bit addresses
|
||||
hash_table_kernel[4] = sizeof(void *) == 8 ? '1' : '0';
|
||||
|
||||
char *source[] = { hash_table_kernel };
|
||||
|
||||
err = create_cl_objects(deviceID, (const char**)source, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS);
|
||||
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(err < 0) return -1; // fail test.
|
||||
|
||||
kernel = clCreateKernel(program, "build_hash_table", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
size_t num_pixels = num_elements;
|
||||
|
||||
int result;
|
||||
cl_uint numBins = 1; // all work groups in all devices and the host code will hammer on this one lock.
|
||||
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
|
||||
if(result == -1) return result;
|
||||
|
||||
numBins = 2; // 2 locks within in same cache line will get hit from different devices and host.
|
||||
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
|
||||
if(result == -1) return result;
|
||||
|
||||
numBins = 29; // locks span a few cache lines.
|
||||
result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
|
||||
if(result == -1) return result;
|
||||
|
||||
return result;
|
||||
}
|
||||
105
test_conformance/SVM/test_fine_grain_sync_buffers.cpp
Normal file
105
test_conformance/SVM/test_fine_grain_sync_buffers.cpp
Normal file
@@ -0,0 +1,105 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
const char *find_targets_kernel[] = {
|
||||
|
||||
"__kernel void find_targets(__global uint* image, uint target, volatile __global atomic_uint *numTargetsFound, volatile __global atomic_uint *targetLocations)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" uint index;\n"
|
||||
" if(image[i] == target) {\n"
|
||||
" index = atomic_fetch_add_explicit(numTargetsFound, 1, memory_order_relaxed, memory_scope_device); \n"
|
||||
" atomic_exchange_explicit(&targetLocations[index], i, memory_order_relaxed, memory_scope_all_svm_devices); \n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
void spawnAnalysisTask(int location)
|
||||
{
|
||||
// printf("found target at location %d\n", location);
|
||||
}
|
||||
|
||||
#define MAX_TARGETS 1024
|
||||
|
||||
// Goals: demonstrate use of SVM's atomics to do fine grain synchronization between the device and host.
|
||||
// Concept: a device kernel is used to search an input image for regions that match a target pattern.
|
||||
// The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices).
|
||||
// The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets.
|
||||
int test_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int err = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
err = create_cl_objects(deviceID, &find_targets_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS);
|
||||
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(err < 0) return -1; // fail test.
|
||||
|
||||
clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err);
|
||||
test_error(err, "clCreateKernel failed");
|
||||
|
||||
size_t num_pixels = num_elements;
|
||||
//cl_uint num_pixels = 1024*1024*32;
|
||||
|
||||
cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0);
|
||||
cl_uint *pNumTargetsFound = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_uint), 0);
|
||||
cl_int *pTargetLocations = (cl_int* ) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int) * MAX_TARGETS, 0);
|
||||
|
||||
cl_uint targetDescriptor = 777;
|
||||
*pNumTargetsFound = 0;
|
||||
cl_uint i;
|
||||
for(i=0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1;
|
||||
for(i=0; i < num_pixels; i++) pInputImage[i] = 0;
|
||||
pInputImage[0] = targetDescriptor;
|
||||
pInputImage[3] = targetDescriptor;
|
||||
pInputImage[num_pixels - 1] = targetDescriptor;
|
||||
|
||||
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), (void*) &targetDescriptor);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound);
|
||||
err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
cl_event done;
|
||||
err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, 0, NULL, &done);
|
||||
test_error(err,"clEnqueueNDRangeKernel failed");
|
||||
clFlush(queues[0]);
|
||||
|
||||
|
||||
i=0;
|
||||
cl_int status;
|
||||
// check for new targets, if found spawn a task to analyze target.
|
||||
do {
|
||||
err = clGetEventInfo(done,CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL);
|
||||
test_error(err,"clGetEventInfo failed");
|
||||
if( AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1) // -1 indicates slot not used yet.
|
||||
{
|
||||
spawnAnalysisTask(pTargetLocations[i]);
|
||||
i++;
|
||||
}
|
||||
} while (status != CL_COMPLETE || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1);
|
||||
|
||||
clSVMFree(context, pInputImage);
|
||||
clSVMFree(context, pNumTargetsFound);
|
||||
clSVMFree(context, pTargetLocations);
|
||||
|
||||
if(i != 3) return -1;
|
||||
return 0;
|
||||
}
|
||||
115
test_conformance/SVM/test_pointer_passing.cpp
Normal file
115
test_conformance/SVM/test_pointer_passing.cpp
Normal file
@@ -0,0 +1,115 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
const char *SVMPointerPassing_test_kernel[] = {
|
||||
"__kernel void verify_char(__global uchar* pChar, volatile __global uint* num_correct, uchar expected)\n"
|
||||
"{\n"
|
||||
" if(0 == get_global_id(0))\n"
|
||||
" {\n"
|
||||
" *num_correct = 0;\n"
|
||||
" if(*pChar == expected)\n"
|
||||
" {\n"
|
||||
" *num_correct=1;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
// Test that arbitrarily aligned char pointers into shared buffers can be passed directly to a kernel.
|
||||
// This iterates through a buffer passing a pointer to each location to the kernel.
|
||||
// The buffer is initialized to known values at each location.
|
||||
// The kernel checks that it finds the expected value at each location.
|
||||
// TODO: possibly make this work across all base types (including typeN?), also check ptr arithmetic ++,--.
|
||||
int test_svm_pointer_passing(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return -1;
|
||||
|
||||
clKernelWrapper kernel_verify_char = clCreateKernel(program, "verify_char", &error);
|
||||
test_error(error,"clCreateKernel failed");
|
||||
|
||||
size_t bufSize = 256;
|
||||
char *pbuf = (char*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_uchar)*bufSize, 0);
|
||||
|
||||
cl_int *pNumCorrect = NULL;
|
||||
pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0);
|
||||
|
||||
{
|
||||
clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar)*bufSize, pbuf, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error = clSetKernelArg(kernel_verify_char, 1, sizeof(void*), (void *) &num_correct);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// put values into buf so that we can expect to see these values in the kernel when we pass a pointer to them.
|
||||
cl_command_queue cmdq = queues[0];
|
||||
cl_uchar* pBuf = (cl_uchar*) clEnqueueMapBuffer(cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uchar)*bufSize, 0, NULL,NULL, &error);
|
||||
test_error2(error, pBuf, "clEnqueueMapBuffer failed");
|
||||
for(int i = 0; i<(int)bufSize; i++)
|
||||
{
|
||||
pBuf[i]= (cl_uchar)i;
|
||||
}
|
||||
error = clEnqueueUnmapMemObject(cmdq, buf, pBuf, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
|
||||
for (cl_uint ii = 0; ii<num_devices; ++ii) // iterate over all devices in the platform.
|
||||
{
|
||||
cmdq = queues[ii];
|
||||
for(int i = 0; i<(int)bufSize; i++)
|
||||
{
|
||||
cl_uchar* pChar = &pBuf[i];
|
||||
error = clSetKernelArgSVMPointer(kernel_verify_char, 0, pChar); // pass a pointer to a location within the buffer
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel_verify_char, 2, sizeof(cl_uchar), (void *) &i ); // pass the expected value at the above location.
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_char, 1, NULL, &bufSize, NULL, 0, NULL, NULL);
|
||||
test_error(error,"clEnqueueNDRangeKernel failed");
|
||||
|
||||
pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
|
||||
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
|
||||
cl_int correct_count = *pNumCorrect;
|
||||
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
|
||||
if(correct_count != 1)
|
||||
{
|
||||
log_error("Passing pointer directly to kernel for byte #%d failed on device %d\n", i, ii);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed");
|
||||
}
|
||||
|
||||
|
||||
clSVMFree(context, pbuf);
|
||||
clSVMFree(context, pNumCorrect);
|
||||
|
||||
return 0;
|
||||
}
|
||||
153
test_conformance/SVM/test_set_kernel_exec_info_svm_ptrs.cpp
Normal file
153
test_conformance/SVM/test_set_kernel_exec_info_svm_ptrs.cpp
Normal file
@@ -0,0 +1,153 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
typedef struct {
|
||||
cl_int *pA;
|
||||
cl_int *pB;
|
||||
cl_int *pC;
|
||||
} BufPtrs;
|
||||
|
||||
const char *set_kernel_exec_info_svm_ptrs_kernel[] = {
|
||||
"struct BufPtrs;\n"
|
||||
"\n"
|
||||
"typedef struct {\n"
|
||||
" __global int *pA;\n"
|
||||
" __global int *pB;\n"
|
||||
" __global int *pC;\n"
|
||||
"} BufPtrs;\n"
|
||||
"\n"
|
||||
"__kernel void set_kernel_exec_info_test(__global BufPtrs* pBufs)\n"
|
||||
"{\n"
|
||||
" size_t i;\n"
|
||||
" i = get_global_id(0);\n"
|
||||
" pBufs->pA[i]++;\n"
|
||||
" pBufs->pB[i]++;\n"
|
||||
" pBufs->pC[i]++;\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
// Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
|
||||
//
|
||||
int test_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper c = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
//error = create_cl_objects(deviceID, &set_kernel_exec_info_svm_ptrs_kernel[0], &context, &program, &q, &num_devices, CL_DEVICE_SVM_FINE_GRAIN);
|
||||
error = create_cl_objects(deviceID, &set_kernel_exec_info_svm_ptrs_kernel[0], &c, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(error < 0) return -1; // fail test.
|
||||
|
||||
|
||||
clKernelWrapper k = clCreateKernel(program, "set_kernel_exec_info_test", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
size_t size = num_elements*sizeof(int);
|
||||
//int* pA = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
|
||||
//int* pB = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
|
||||
//int* pC = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
|
||||
int* pA = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
|
||||
int* pB = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
|
||||
int* pC = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
|
||||
BufPtrs* pBuf = (BufPtrs*) clSVMAlloc(c, CL_MEM_READ_WRITE, sizeof(BufPtrs), 0);
|
||||
|
||||
bool failed = false;
|
||||
{
|
||||
clMemWrapper ba,bb,bc,bBuf;
|
||||
ba = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pA, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
bb = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pB, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
bc = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pC, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
bBuf = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, sizeof(BufPtrs), pBuf, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bBuf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(BufPtrs), 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
|
||||
for(int i = 0; i < num_elements; i++) pA[i] = pB[i] = pC[i] = 0;
|
||||
|
||||
pBuf->pA = pA;
|
||||
pBuf->pB = pB;
|
||||
pBuf->pC = pC;
|
||||
|
||||
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bBuf, pBuf, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
|
||||
|
||||
error = clSetKernelArgSVMPointer(k, 0, pBuf);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, sizeof(BufPtrs), pBuf);
|
||||
test_error(error, "clSetKernelExecInfo failed");
|
||||
|
||||
size_t range = num_elements;
|
||||
error = clEnqueueNDRangeKernel(queues[0], k, 1, NULL, &range, NULL, 0, NULL, NULL);
|
||||
test_error(error,"clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clFinish(queues[0]);
|
||||
test_error(error, "clFinish failed.");
|
||||
|
||||
clEnqueueMapBuffer(queues[0], ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
clEnqueueMapBuffer(queues[0], bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
|
||||
test_error(error, "clEnqueueMapBuffer failed");
|
||||
|
||||
for(int i = 0; i < num_elements; i++)
|
||||
{
|
||||
if(pA[i] + pB[i] + pC[i] != 3)
|
||||
failed = true;
|
||||
}
|
||||
|
||||
error = clEnqueueUnmapMemObject(queues[0], ba, pA, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bb, pB, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
error = clEnqueueUnmapMemObject(queues[0], bc, pC, 0, NULL, NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
}
|
||||
|
||||
error = clFinish(queues[0]);
|
||||
test_error(error, " clFinish failed.");
|
||||
|
||||
clSVMFree(c, pA);
|
||||
clSVMFree(c, pB);
|
||||
clSVMFree(c, pC);
|
||||
clSVMFree(c, pBuf);
|
||||
|
||||
if(failed) return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
282
test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
Normal file
282
test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
Normal file
@@ -0,0 +1,282 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
// Creates linked list using host code
|
||||
cl_int create_linked_lists_on_host(cl_command_queue cmdq, cl_mem nodes, Node *pNodes2, cl_int ListLength, size_t numLists, cl_bool useNewAPI )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
log_info("SVM: creating linked list on host ");
|
||||
|
||||
Node *pNodes;
|
||||
if (useNewAPI == CL_FALSE)
|
||||
{
|
||||
pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqMapBuffer failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
pNodes = pNodes2;
|
||||
error = clEnqueueSVMMap(cmdq, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, pNodes2, sizeof(Node)*ListLength*numLists, 0, NULL,NULL);
|
||||
test_error2(error, pNodes, "clEnqueueSVMMap failed");
|
||||
}
|
||||
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
|
||||
if (useNewAPI == CL_FALSE)
|
||||
{
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueSVMUnmap(cmdq, pNodes2, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueSVMUnmap failed.");
|
||||
}
|
||||
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed.");
|
||||
return error;
|
||||
}
|
||||
|
||||
// Purpose: uses host code to verify correctness of the linked list
|
||||
cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, Node *pNodes2, cl_int ListLength, size_t numLists, cl_bool useNewAPI )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
cl_int correct_count;
|
||||
|
||||
Node *pNodes;
|
||||
if (useNewAPI == CL_FALSE)
|
||||
{
|
||||
pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
pNodes = pNodes2;
|
||||
error = clEnqueueSVMMap(cmdq, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, pNodes2, sizeof(Node)*ListLength * numLists, 0, NULL,NULL);
|
||||
test_error2(error, pNodes, "clEnqueueSVMMap failed");
|
||||
}
|
||||
|
||||
correct_count = 0;
|
||||
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if(error) return -1;
|
||||
|
||||
if (useNewAPI == CL_FALSE)
|
||||
{
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
}
|
||||
else
|
||||
{
|
||||
error = clEnqueueSVMUnmap(cmdq, pNodes2, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueSVMUnmap failed.");
|
||||
}
|
||||
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed.");
|
||||
return error;
|
||||
}
|
||||
|
||||
cl_int create_linked_lists_on_device(int ci, cl_command_queue cmdq, cl_mem allocator, cl_kernel kernel_create_lists, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
log_info("SVM: creating linked list on device: %d ", ci);
|
||||
|
||||
size_t *pAllocator = (size_t*) clEnqueueMapBuffer(cmdq, allocator, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
|
||||
test_error2(error, pAllocator, "clEnqueueMapBuffer failed");
|
||||
// reset allocator index
|
||||
*pAllocator = numLists; // the first numLists elements of the nodes array are already allocated (they hold the head of each list).
|
||||
error = clEnqueueUnmapMemObject(cmdq, allocator, pAllocator, 0,NULL,NULL);
|
||||
test_error(error, " clEnqueueUnmapMemObject failed.");
|
||||
|
||||
error = clEnqueueNDRangeKernel(cmdq, kernel_create_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRange failed.");
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed.");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
cl_int verify_linked_lists_on_device(int vi, cl_command_queue cmdq,cl_mem num_correct, cl_kernel kernel_verify_lists, cl_int ListLength, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
log_info(" and verifying on device: %d ", vi);
|
||||
|
||||
cl_int *pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
|
||||
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
|
||||
|
||||
*pNumCorrect = 0; // reset numCorrect to zero
|
||||
|
||||
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed.");
|
||||
|
||||
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
|
||||
test_error(error,"clEnqueueNDRangeKernel failed");
|
||||
|
||||
pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
|
||||
test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
|
||||
cl_int correct_count = *pNumCorrect;
|
||||
error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
clFinish(cmdq);
|
||||
test_error(error,"clFinish failed");
|
||||
|
||||
if(correct_count != ListLength * (cl_uint)numLists)
|
||||
{
|
||||
error = -1;
|
||||
log_info("Failed\n");
|
||||
}
|
||||
else
|
||||
log_info("Passed\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
// This tests that all devices and the host share a common address space; using only the coarse-grain features.
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
|
||||
// the platform. The test passes only if every combination passes.
|
||||
int shared_address_space_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements, cl_bool useNewAPI)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return -1;
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this buffer holds the linked list nodes.
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
|
||||
|
||||
{
|
||||
cl_bool usesSVMpointer = CL_FALSE;
|
||||
clMemWrapper nodes;
|
||||
if (useNewAPI == CL_FALSE)
|
||||
{
|
||||
nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// verify if buffer uses SVM pointer
|
||||
size_t paramSize = 0;
|
||||
error = clGetMemObjectInfo(nodes, CL_MEM_USES_SVM_POINTER, 0, 0, ¶mSize);
|
||||
test_error(error, "clGetMemObjectInfo failed.");
|
||||
|
||||
if (paramSize != sizeof(cl_bool))
|
||||
{
|
||||
log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned wrong size.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetMemObjectInfo(nodes, CL_MEM_USES_SVM_POINTER, sizeof(cl_bool), &usesSVMpointer, 0);
|
||||
test_error(error, "clGetMemObjectInfo failed.");
|
||||
|
||||
if (usesSVMpointer != CL_TRUE)
|
||||
{
|
||||
log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned CL_FALSE for buffer created from SVM pointer.");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// this buffer holds an index into the nodes buffer, it is used for node allocation
|
||||
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error = clGetMemObjectInfo(allocator, CL_MEM_USES_SVM_POINTER, sizeof(cl_bool), &usesSVMpointer, 0);
|
||||
test_error(error, "clGetMemObjectInfo failed.");
|
||||
|
||||
if (usesSVMpointer != CL_FALSE)
|
||||
{
|
||||
log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned CL_TRUE for non-SVM buffer.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// this buffer holds the count of correct nodes, which is computed by the verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
if (useNewAPI == CL_TRUE)
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
|
||||
else
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
|
||||
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
error = create_linked_lists_on_host(queues[0], nodes, pNodes, ListLength, numLists, useNewAPI);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists_on_host(vi, queues[0], nodes, pNodes, ListLength, numLists, useNewAPI);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clSVMFree(context, pNodes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_FALSE);
|
||||
}
|
||||
|
||||
int test_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_TRUE);
|
||||
}
|
||||
101
test_conformance/SVM/test_shared_address_space_fine_grain.cpp
Normal file
101
test_conformance/SVM/test_shared_address_space_fine_grain.cpp
Normal file
@@ -0,0 +1,101 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
|
||||
// This tests that all devices and the host share a common address space using fine-grain mode with no buffers.
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
|
||||
// the platform. The test passes only if every combination passes.
|
||||
int test_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM);
|
||||
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(error < 0) return -1; // fail test.
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this allocation holds the linked list nodes.
|
||||
// FIXME: remove the alignment once prototype can handle it
|
||||
Node* pNodes = (Node*) align_malloc(numLists*ListLength*sizeof(Node),128);
|
||||
test_error2(error, pNodes, "malloc failed");
|
||||
|
||||
// this allocation holds an index into the nodes buffer, it is used for node allocation
|
||||
size_t* pAllocator = (size_t*) align_malloc(sizeof(cl_int), 128);
|
||||
test_error2(error, pAllocator, "malloc failed");
|
||||
|
||||
// this allocation holds the count of correct nodes, which is computed by the verify kernel.
|
||||
cl_int* pNum_correct = (cl_int*) align_malloc(sizeof(cl_int), 128);
|
||||
test_error2(error, pNum_correct, "malloc failed");
|
||||
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),(void *) &ListLength);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNum_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
log_info("creating linked list on host ");
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNum_correct, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
align_free(pNodes);
|
||||
align_free(pAllocator);
|
||||
align_free(pNum_correct);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,138 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
|
||||
|
||||
|
||||
cl_int create_linked_lists_on_device_no_map(int ci, cl_command_queue cmdq, size_t* pAllocator, cl_kernel kernel_create_lists, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
log_info("SVM: creating linked list on device: %d ", ci);
|
||||
|
||||
// reset allocator index
|
||||
*pAllocator = numLists; // the first numLists elements of the nodes array are already allocated (they hold the head of each list).
|
||||
error = clEnqueueNDRangeKernel(cmdq, kernel_create_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRange failed.");
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed.");
|
||||
return error;
|
||||
}
|
||||
|
||||
cl_int verify_linked_lists_on_device_no_map(int vi, cl_command_queue cmdq,cl_int* pNumCorrect, cl_kernel kernel_verify_lists, cl_int ListLength, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
log_info(" and verifying on device: %d ", vi);
|
||||
|
||||
*pNumCorrect = 0; // reset numCorrect to zero
|
||||
|
||||
error = clEnqueueNDRangeKernel(cmdq, kernel_verify_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
|
||||
test_error(error,"clEnqueueNDRangeKernel failed");
|
||||
clFinish(cmdq);
|
||||
test_error(error,"clFinish failed");
|
||||
|
||||
cl_int correct_count = *pNumCorrect;
|
||||
if(correct_count != ListLength * (cl_uint)numLists)
|
||||
{
|
||||
error = -1;
|
||||
log_info("Failed\n");
|
||||
}
|
||||
else
|
||||
log_info("Passed\n");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
// This tests that all devices and the host share a common address space; using only the fine-grain with buffers mode.
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host. This basic test is performed for all combinations of devices and the host that exist within
|
||||
// the platform. The test passes only if every combination passes.
|
||||
int test_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
|
||||
if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
|
||||
if(error < 0) return -1; // fail test.
|
||||
|
||||
size_t numLists = num_elements;
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
// this buffer holds the linked list nodes.
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(Node)*ListLength*numLists, 0);
|
||||
|
||||
// this buffer holds an index into the nodes buffer, it is used for node allocation
|
||||
size_t *pAllocator = (size_t*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(size_t), 0);
|
||||
|
||||
// this buffer holds the count of correct nodes, which is computed by the verify kernel.
|
||||
cl_int *pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_int), 0);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
|
||||
error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNumCorrect);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int), (void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
log_info("SVM: creating linked list on host ");
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNumCorrect, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clSVMFree(context, pNodes);
|
||||
clSVMFree(context, pAllocator);
|
||||
clSVMFree(context, pNumCorrect);
|
||||
|
||||
return 0;
|
||||
}
|
||||
241
test_conformance/SVM/test_shared_sub_buffers.cpp
Normal file
241
test_conformance/SVM/test_shared_sub_buffers.cpp
Normal file
@@ -0,0 +1,241 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "common.h"
|
||||
|
||||
const char *shared_sub_buffers_test_kernel[] = {
|
||||
"typedef struct Node {\n"
|
||||
" int global_id;\n"
|
||||
" int position_in_list;\n"
|
||||
" __global struct Node* pNext;\n"
|
||||
"} Node;\n"
|
||||
|
||||
// create linked lists that use nodes from 2 different buffers
|
||||
"__global Node* allocate_node(__global Node* pNodes1, __global Node* pNodes2, volatile __global int* allocation_index, size_t i)\n"
|
||||
"{\n"
|
||||
// mix things up, adjacent work items will allocate from different buffers
|
||||
" if(i & 0x1)\n"
|
||||
" return &pNodes1[atomic_inc(allocation_index)];\n"
|
||||
" else\n"
|
||||
" return &pNodes2[atomic_inc(allocation_index)];\n"
|
||||
"}\n"
|
||||
|
||||
// The allocation_index parameter must be initialized on the host to N work-items
|
||||
// The first N nodes in pNodes will be the heads of the lists.
|
||||
// This tests passing 4 different sub-buffers that come from two parent buffers.
|
||||
// Note that we have arguments that appear to be unused, but they are required so that system knows to get all the sub-buffers on to the device
|
||||
"__kernel void create_linked_lists(__global Node* pNodes_sub1, __global Node* pNodes2_sub1, __global Node* pNodes_sub2, __global Node* pNodes2_sub2, volatile __global int* allocation_index, int list_length) \n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" __global Node *pNode = &pNodes_sub1[i];\n"
|
||||
" pNode->global_id = i;\n"
|
||||
" pNode->position_in_list = 0;\n"
|
||||
" __global Node *pNew;\n"
|
||||
" for(int j=1; j < list_length; j++) {\n"
|
||||
" pNew = allocate_node(pNodes_sub1, pNodes2_sub1, allocation_index, i);\n"
|
||||
" pNew->global_id = i;\n"
|
||||
" pNew->position_in_list = j;\n"
|
||||
" pNode->pNext = pNew; // link new node onto end of list\n"
|
||||
" pNode = pNew; // move to end of list\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
// Note that we have arguments that appear to be unused, but they are required so that system knows to get all the sub-buffers on to the device
|
||||
"__kernel void verify_linked_lists(__global Node* pNodes_sub1, __global Node* pNodes2_sub1, __global Node* pNodes_sub2, __global Node* pNodes2_sub2, volatile __global uint* num_correct, int list_length)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" __global Node *pNode = &pNodes_sub1[i];\n"
|
||||
" for(int j=0; j < list_length; j++) {\n"
|
||||
" if( pNode->global_id == i && pNode->position_in_list == j)\n"
|
||||
" atomic_inc(num_correct);\n"
|
||||
" else \n"
|
||||
" break;\n"
|
||||
" pNode = pNode->pNext;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
// Creates linked list using host code.
|
||||
cl_int create_linked_lists_on_host_sb(cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
log_info("SVM: creating linked list on host ");
|
||||
|
||||
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
|
||||
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes2, "clEnqueueMapBuffer failed");
|
||||
|
||||
create_linked_lists(pNodes, numLists, ListLength);
|
||||
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
// Verify correctness of the linked list using host code.
|
||||
cl_int verify_linked_lists_on_host_sb(int ci, cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
|
||||
{
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
//log_info(" and verifying on host ");
|
||||
|
||||
Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
|
||||
test_error2(error, pNodes, "clEnqueueMapBuffer failed");
|
||||
|
||||
error = verify_linked_lists(pNodes, numLists, ListLength);
|
||||
if(error) return -1;
|
||||
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
|
||||
test_error(error, "clEnqueueUnmapMemObject failed");
|
||||
error = clFinish(cmdq);
|
||||
test_error(error, "clFinish failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// This tests that shared sub-buffers can be created and that they inherit the flags from the parent buffer when no flags are specified.
|
||||
// This tests that passing only the sub-buffers to a kernel works.
|
||||
// The test is derived from the cross-buffer pointers test which
|
||||
// tests that shared buffers are able to contain pointers that point to other shared buffers.
|
||||
// This tests that all devices and the host share a common address space; using only the coarse-grain features.
|
||||
// This is done by creating a linked list on a device and then verifying the correctness of the list
|
||||
// on another device or the host.
|
||||
// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
|
||||
// This basic test is performed for all combinations of devices and the host.
|
||||
int test_shared_sub_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clContextWrapper context = NULL;
|
||||
clProgramWrapper program = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_int error = CL_SUCCESS;
|
||||
clCommandQueueWrapper queues[MAXQ];
|
||||
|
||||
error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
|
||||
if(error) return -1;
|
||||
|
||||
size_t numLists = num_elements;
|
||||
if(numLists & 0x1) numLists++; // force even size, so we can easily create two sub-buffers of same size.
|
||||
|
||||
cl_int ListLength = 32;
|
||||
|
||||
clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
|
||||
test_error(error, "clCreateKernel failed");
|
||||
|
||||
size_t nodes_bufsize = sizeof(Node)*ListLength*numLists;
|
||||
Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
|
||||
Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
|
||||
|
||||
{
|
||||
// this buffer holds some of the linked list nodes.
|
||||
clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, nodes_bufsize, pNodes, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
cl_buffer_region r;
|
||||
r.origin = 0;
|
||||
r.size = nodes_bufsize / 2;
|
||||
// this should inherit the flag settings from nodes buffer
|
||||
clMemWrapper nodes_sb1 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
r.origin = nodes_bufsize / 2;
|
||||
clMemWrapper nodes_sb2 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
|
||||
|
||||
// this buffer holds some of the linked list nodes.
|
||||
clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
r.origin = 0;
|
||||
r.size = nodes_bufsize / 2;
|
||||
// this should inherit the flag settings from nodes buffer
|
||||
clMemWrapper nodes2_sb1 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
r.origin = nodes_bufsize / 2;
|
||||
clMemWrapper nodes2_sb2 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION,(void*)&r, &error);
|
||||
test_error(error, "clCreateSubBuffer");
|
||||
|
||||
|
||||
|
||||
// this buffer holds the index into the nodes buffer that is used for node allocation
|
||||
clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
// this buffer holds the count of correct nodes which is computed by the verify kernel.
|
||||
clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed.");
|
||||
|
||||
error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes_sb1);
|
||||
error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
|
||||
error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &nodes_sb2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
|
||||
error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void*), (void *) &allocator);
|
||||
error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),(void *) &ListLength);
|
||||
|
||||
error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes_sb1);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &nodes_sb2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void*), (void *) &num_correct);
|
||||
error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),(void *) &ListLength);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Create linked list on one device and verify on another device (or the host).
|
||||
// Do this for all possible combinations of devices and host within the platform.
|
||||
for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
|
||||
{
|
||||
for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
|
||||
{
|
||||
if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
|
||||
{
|
||||
error = create_linked_lists_on_host_sb(queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
|
||||
if(vi == num_devices)
|
||||
{
|
||||
error = verify_linked_lists_on_host_sb(vi, queues[0], nodes, nodes2, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
|
||||
if(error) return -1;
|
||||
}
|
||||
} // inner loop, vi
|
||||
} // outer loop, ci
|
||||
}
|
||||
clSVMFree(context, pNodes2);
|
||||
clSVMFree(context, pNodes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user