Initial open source release of OpenCL 2.0 CTS.

This commit is contained in:
Kedar Patil
2017-05-16 18:50:35 +05:30
parent 6911ba5116
commit 3a440d17c8
883 changed files with 318212 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
set(MODULE_NAME BUFFERS)
set(${MODULE_NAME}_SOURCES
main.c
test_buffer_copy.c
test_buffer_read.c
test_buffer_write.c
test_buffer_mem.c
array_info.c
test_buffer_map.c
test_sub_buffers.cpp
test_buffer_fill.c
test_buffer_migrate.c
test_image_migrate.c
../../test_common/harness/errorHelpers.c
../../test_common/harness/threadTesting.c
../../test_common/harness/testHarness.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/typeWrappers.cpp
../../test_common/harness/mt19937.c
../../test_common/harness/conversions.c
../../test_common/harness/msvc9.c
)
include(../CMakeCommon.txt)

View File

@@ -0,0 +1,24 @@
project
: requirements
<toolset>gcc:<cflags>-xc++
<toolset>msvc:<cflags>"/TP"
;
exe test_buffers
: array_info.c
main.c
test_buffer_copy.c
test_buffer_map.c
test_buffer_mem.c
test_buffer_read.c
test_buffer_write.c
test_buffer_fill.c
: <library>../..//glew
;
install dist
: test_buffers
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/buffers
<variant>release:<location>$(DIST)/release/tests/test_conformance/buffers
;

View File

@@ -0,0 +1,49 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.c test_buffer_copy.c test_buffer_read.c test_buffer_write.c \
test_buffer_mem.c array_info.c test_buffer_map.c \
test_sub_buffers.cpp test_buffer_fill.c \
test_buffer_migrate.c test_image_migrate.c \
../../test_common/harness/errorHelpers.c \
../../test_common/harness/threadTesting.c \
../../test_common/harness/testHarness.c \
../../test_common/harness/kernelHelpers.c \
../../test_common/harness/conversions.c \
../../test_common/harness/mt19937.c \
../../test_common/harness/typeWrappers.cpp
DEFINES =
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
FRAMEWORK = $(SOURCES)
HEADERS =
TARGET = test_buffers
INCLUDE =
COMPILERFLAGS = -c -Wall -g -O0 -Wshorten-64-to-32
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%)
LIBRARIES = -framework OpenCL -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,63 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int testBufferSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
cl_mem memobj;
cl_int err;
size_t w = 32, h = 32, d = 32;
size_t retSize;
size_t elementSize = sizeof( cl_int );
memobj = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), elementSize * w*h*d, NULL, &err);
test_error(err, "clCreateBuffer failed.");
err = clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof( size_t ), (void *)&retSize, NULL);
if ( err ){
log_error( "Error calling clGetMemObjectInfo(): %d\n", err );
clReleaseMemObject(memobj);
return -1;
}
if ( (elementSize * w * h * d) != retSize ) {
log_error( "Error in clGetMemObjectInfo() check of size\n" );
clReleaseMemObject(memobj);
return -1;
}
else{
log_info( " CL_MEM_SIZE passed.\n" );
}
// cleanup
clReleaseMemObject(memobj);
return err;
} // end testArrayElementSize()
// FIXME: need to test other flags

View File

@@ -0,0 +1,246 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "procs.h"
#include "../../test_common/harness/testHarness.h"
basefn bufferfn_list[] = {
test_buffer_read_async_int,
test_buffer_read_async_uint,
test_buffer_read_async_long,
test_buffer_read_async_ulong,
test_buffer_read_async_short,
test_buffer_read_async_ushort,
test_buffer_read_async_char,
test_buffer_read_async_uchar,
test_buffer_read_async_float,
test_buffer_read_array_barrier_int,
test_buffer_read_array_barrier_uint,
test_buffer_read_array_barrier_long,
test_buffer_read_array_barrier_ulong,
test_buffer_read_array_barrier_short,
test_buffer_read_array_barrier_ushort,
test_buffer_read_array_barrier_char,
test_buffer_read_array_barrier_uchar,
test_buffer_read_array_barrier_float,
test_buffer_read_int,
test_buffer_read_uint,
test_buffer_read_long,
test_buffer_read_ulong,
test_buffer_read_short,
test_buffer_read_ushort,
test_buffer_read_float,
0, //test_buffer_read_half,
test_buffer_read_char,
test_buffer_read_uchar,
test_buffer_read_struct,
test_buffer_read_random_size,
test_buffer_map_read_int,
test_buffer_map_read_uint,
test_buffer_map_read_long,
test_buffer_map_read_ulong,
test_buffer_map_read_short,
test_buffer_map_read_ushort,
test_buffer_map_read_char,
test_buffer_map_read_uchar,
test_buffer_map_read_float,
test_buffer_map_read_struct,
test_buffer_map_write_int,
test_buffer_map_write_uint,
test_buffer_map_write_long,
test_buffer_map_write_ulong,
test_buffer_map_write_short,
test_buffer_map_write_ushort,
test_buffer_map_write_char,
test_buffer_map_write_uchar,
test_buffer_map_write_float,
test_buffer_map_write_struct,
test_buffer_write_int,
test_buffer_write_uint,
test_buffer_write_short,
test_buffer_write_ushort,
test_buffer_write_char,
test_buffer_write_uchar,
test_buffer_write_float,
0, //test_buffer_write_half,
test_buffer_write_long,
test_buffer_write_ulong,
test_buffer_write_struct,
test_buffer_write_async_int,
test_buffer_write_async_uint,
test_buffer_write_async_short,
test_buffer_write_async_ushort,
test_buffer_write_async_char,
test_buffer_write_async_uchar,
test_buffer_write_async_float,
test_buffer_write_async_long,
test_buffer_write_async_ulong,
test_buffer_copy,
test_buffer_partial_copy,
test_mem_read_write_flags,
test_mem_write_flags,
test_mem_read_flags,
test_mem_copy_host_flags,
0, //test_mem_alloc_ref_flags,
testBufferSize,
test_sub_buffers_read_write,
test_sub_buffers_read_write_dual_devices,
test_sub_buffers_overlapping,
test_buffer_fill_int,
test_buffer_fill_uint,
test_buffer_fill_short,
test_buffer_fill_ushort,
test_buffer_fill_char,
test_buffer_fill_uchar,
test_buffer_fill_long,
test_buffer_fill_ulong,
test_buffer_fill_float,
test_buffer_fill_struct,
test_buffer_migrate,
test_image_migrate,
};
const char *bufferfn_names[] = {
"buffer_read_async_int",
"buffer_read_async_uint",
"buffer_read_async_long",
"buffer_read_async_ulong",
"buffer_read_async_short",
"buffer_read_async_ushort",
"buffer_read_async_char",
"buffer_read_async_uchar",
"buffer_read_async_float",
"buffer_read_array_barrier_int",
"buffer_read_array_barrier_uint",
"buffer_read_array_barrier_long",
"buffer_read_array_barrier_ulong",
"buffer_read_array_barrier_short",
"buffer_read_array_barrier_ushort",
"buffer_read_array_barrier_char",
"buffer_read_array_barrier_uchar",
"buffer_read_array_barrier_float",
"buffer_read_int",
"buffer_read_uint",
"buffer_read_long",
"buffer_read_ulong",
"buffer_read_short",
"buffer_read_ushort",
"buffer_read_float",
"buffer_read_half",
"buffer_read_char",
"buffer_read_uchar",
"buffer_read_struct",
"buffer_read_random_size",
"buffer_map_read_int",
"buffer_map_read_uint",
"buffer_map_read_long",
"buffer_map_read_ulong",
"buffer_map_read_short",
"buffer_map_read_ushort",
"buffer_map_read_char",
"buffer_map_read_uchar",
"buffer_map_read_float",
"buffer_map_read_struct",
"buffer_map_write_int",
"buffer_map_write_uint",
"buffer_map_write_long",
"buffer_map_write_ulong",
"buffer_map_write_short",
"buffer_map_write_ushort",
"buffer_map_write_char",
"buffer_map_write_uchar",
"buffer_map_write_float",
"buffer_map_write_struct",
"buffer_write_int",
"buffer_write_uint",
"buffer_write_short",
"buffer_write_ushort",
"buffer_write_char",
"buffer_write_uchar",
"buffer_write_float",
"buffer_write_half",
"buffer_write_long",
"buffer_write_ulong",
"buffer_write_struct",
"buffer_write_async_int",
"buffer_write_async_uint",
"buffer_write_async_short",
"buffer_write_async_ushort",
"buffer_write_async_char",
"buffer_write_async_uchar",
"buffer_write_async_float",
"buffer_write_async_long",
"buffer_write_async_ulong",
"buffer_copy",
"buffer_partial_copy",
"mem_read_write_flags",
"mem_write_only_flags",
"mem_read_only_flags",
"mem_copy_host_flags",
"mem_alloc_ref_flags",
"array_info_size",
"sub_buffers_read_write",
"sub_buffers_read_write_dual_devices",
"sub_buffers_overlapping",
"buffer_fill_int",
"buffer_fill_uint",
"buffer_fill_short",
"buffer_fill_ushort",
"buffer_fill_char",
"buffer_fill_uchar",
"buffer_fill_long",
"buffer_fill_ulong",
"buffer_fill_float",
"buffer_fill_struct",
"buffer_migrate",
"image_migrate",
};
ct_assert((sizeof(bufferfn_names) / sizeof(bufferfn_names[0])) == (sizeof(bufferfn_list) / sizeof(bufferfn_list[0])));
int num_bufferfns = sizeof(bufferfn_names) / sizeof(char *);
const cl_mem_flags flag_set[] = {
CL_MEM_ALLOC_HOST_PTR,
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_USE_HOST_PTR,
CL_MEM_COPY_HOST_PTR,
0
};
const char* flag_set_names[] = {
"CL_MEM_ALLOC_HOST_PTR",
"CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
"CL_MEM_USE_HOST_PTR",
"CL_MEM_COPY_HOST_PTR",
"0"
};
int main( int argc, const char *argv[] )
{
return runTestHarness( argc, argv, num_bufferfns, bufferfn_list, bufferfn_names,
false, false, 0 );
}

View File

@@ -0,0 +1,132 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef __PROCS_H__
#define __PROCS_H__
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/mt19937.h"
#include "../../test_common/harness/conversions.h"
#ifndef __APPLE__
#include <CL/cl.h>
#endif
extern const cl_mem_flags flag_set[];
extern const char* flag_set_names[];
#define NUM_FLAGS 5
extern int test_buffer_read_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_random_size( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_async_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_async_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_async_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_async_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_async_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_async_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_async_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_async_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_async_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_array_barrier_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_array_barrier_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_array_barrier_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_array_barrier_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_array_barrier_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_array_barrier_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_array_barrier_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_array_barrier_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_read_array_barrier_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_async_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_async_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_async_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_async_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_async_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_async_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_async_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_async_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_write_async_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_copy( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_partial_copy( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int testBufferSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_mem_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_mem_read_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_mem_copy_host_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_mem_alloc_ref_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_read_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_read_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_read_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_read_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_read_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_read_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_read_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_read_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_read_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_read_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_write_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_write_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_write_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_write_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_write_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_write_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_write_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_write_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_write_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_map_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_sub_buffers_read_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_sub_buffers_overlapping( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_image_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_buffer_fill_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_fill_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_fill_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_fill_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_fill_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_fill_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_fill_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_fill_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_fill_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
#endif // #ifndef __PROCS_H__

View File

@@ -0,0 +1,295 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/errorHelpers.h"
static int verify_copy_buffer(int *inptr, int *outptr, int n)
{
int i;
for (i=0; i<n; i++){
if ( outptr[i] != inptr[i] )
return -1;
}
return 0;
}
static int test_copy( cl_command_queue queue, cl_context context, int num_elements, MTdata d )
{
cl_mem buffers[2];
cl_int *int_input_ptr, *int_output_ptr;
cl_int err;
int i;
int src_flag_id, dst_flag_id;
int errors = 0;
size_t min_alignment = get_min_alignment(context);
int_input_ptr = (cl_int*) align_malloc(sizeof(cl_int) * num_elements, min_alignment);
int_output_ptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
for (i=0; i<num_elements; i++){
int_input_ptr[i] = (int)genrand_int32( d );
int_output_ptr[i] = 0xdeaddead; // seed with incorrect data
}
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], sizeof(cl_int) * num_elements, int_input_ptr, &err);
else
buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], sizeof(cl_int) * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed\n" );
align_free( (void *)int_input_ptr );
align_free( (void *)int_output_ptr );
return -1;
}
if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id], sizeof(cl_int) * num_elements, int_output_ptr, &err);
else
buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id], sizeof(cl_int) * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed\n" );
clReleaseMemObject( buffers[0] );
align_free( (void *)int_input_ptr );
align_free( (void *)int_output_ptr );
return -1;
}
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)int_input_ptr, 0, NULL, NULL);
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueWriteBuffer failed" );
clReleaseMemObject( buffers[0] );
clReleaseMemObject( buffers[1] );
align_free( (void *)int_output_ptr );
align_free( (void *)int_input_ptr );
return -1;
}
}
err = clEnqueueCopyBuffer(queue, buffers[0], buffers[1], 0, 0, sizeof(cl_int)*num_elements, 0, NULL, NULL);
if ( err != CL_SUCCESS ){
print_error( err, "clCopyArray failed" );
clReleaseMemObject( buffers[0] );
clReleaseMemObject( buffers[1] );
align_free( (void *)int_output_ptr );
align_free( (void *)int_input_ptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(int)*num_elements, (void *)int_output_ptr, 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[0] );
clReleaseMemObject( buffers[1] );
align_free( (void *)int_output_ptr );
align_free( (void *)int_input_ptr );
return -1;
}
if ( verify_copy_buffer(int_input_ptr, int_output_ptr, num_elements) ){
log_error( " test failed\n" );
errors++;
}
else{
log_info( " test passed\n" );
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseMemObject( buffers[1] );
} // dst flags
} // src flags
// cleanup
align_free( (void *)int_output_ptr );
align_free( (void *)int_input_ptr );
return errors;
} // end test_copy()
static int testPartialCopy( cl_command_queue queue, cl_context context, int num_elements, cl_uint srcStart, cl_uint dstStart, int size, MTdata d )
{
cl_mem buffers[2];
int *inptr, *outptr;
cl_int err;
int i;
int src_flag_id, dst_flag_id;
int errors = 0;
size_t min_alignment = get_min_alignment(context);
inptr = (int *)align_malloc( sizeof(int) * num_elements, min_alignment);
if ( ! inptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(int) * num_elements );
return -1;
}
outptr = (int *)align_malloc( sizeof(int) * num_elements, min_alignment);
if ( ! outptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(int) * num_elements );
align_free( (void *)inptr );
return -1;
}
for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
for (i=0; i<num_elements; i++){
inptr[i] = (int)genrand_int32( d );
outptr[i] = (int)0xdeaddead; // seed with incorrect data
}
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], sizeof(cl_int) * num_elements, inptr, &err);
else
buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], sizeof(cl_int) * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed\n" )
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id], sizeof(cl_int) * num_elements, outptr, &err);
else
buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id], sizeof(cl_int) * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed\n" );
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)){
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueWriteBuffer failed" );
clReleaseMemObject( buffers[1] );
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
}
err = clEnqueueCopyBuffer(queue, buffers[0], buffers[1], srcStart*sizeof(cl_int), dstStart*sizeof(cl_int), sizeof(cl_int)*size, 0, NULL, NULL);
if ( err != CL_SUCCESS){
print_error( err, "clEnqueueCopyBuffer failed" );
clReleaseMemObject( buffers[1] );
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(int)*num_elements, (void *)outptr, 0, NULL, NULL );
if ( err != CL_SUCCESS){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[1] );
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
if ( verify_copy_buffer(inptr + srcStart, outptr + dstStart, size) ){
log_error("buffer_COPY test failed\n");
errors++;
}
else{
log_info("buffer_COPY test passed\n");
}
// cleanup
clReleaseMemObject( buffers[1] );
clReleaseMemObject( buffers[0] );
} // dst mem flags
} // src mem flags
// cleanup
align_free( (void *)outptr );
align_free( (void *)inptr );
return errors;
} // end testPartialCopy()
int test_buffer_copy( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
int i, err = 0;
int size;
MTdata d = init_genrand( gRandomSeed );
// test the preset size
log_info( "set size: %d: ", num_elements );
if (test_copy( queue, context, num_elements, d ))
err++;
// now test random sizes
for ( i = 0; i < 8; i++ ){
size = (int)get_random_float(2.f,131072.f, d);
log_info( "random size: %d: ", size );
if (test_copy( queue, context, size, d ))
err++;
}
free_mtdata(d);
return err;
} // end test_buffer_copy()
int test_buffer_partial_copy( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
int i, err = 0;
int size;
cl_uint srcStart, dstStart;
MTdata d = init_genrand( gRandomSeed );
// now test copy of partial sizes
for ( i = 0; i < 8; i++ ){
srcStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
size = (int)get_random_float( 8.f, (float)(num_elements - srcStart), d );
dstStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - size), d );
log_info( "random partial copy from %d to %d, size: %d: ", (int)srcStart, (int)dstStart, size );
if (testPartialCopy( queue, context, num_elements, srcStart, dstStart, size, d ))
err++;
}
free_mtdata(d);
return err;
} // end test_buffer_partial_copy()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,703 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/errorHelpers.h"
#define TEST_PRIME_INT ((1<<16)+1)
#define TEST_PRIME_UINT ((1U<<16)+1U)
#define TEST_PRIME_LONG ((1LL<<32)+1LL)
#define TEST_PRIME_ULONG ((1ULL<<32)+1ULL)
#define TEST_PRIME_SHORT ((1S<<8)+1S)
#define TEST_PRIME_FLOAT (float)3.40282346638528860e+38
#define TEST_PRIME_HALF 119.f
#define TEST_BOOL true
#define TEST_PRIME_CHAR 0x77
#ifndef TestStruct
typedef struct{
int a;
float b;
} TestStruct;
#endif
//--- the code for the kernel executables
static const char *buffer_read_int_kernel_code[] = {
"__kernel void test_buffer_read_int(__global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n",
"__kernel void test_buffer_read_int2(__global int2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n",
"__kernel void test_buffer_read_int4(__global int4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n",
"__kernel void test_buffer_read_int8(__global int8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n",
"__kernel void test_buffer_read_int16(__global int16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n" };
static const char *int_kernel_name[] = { "test_buffer_read_int", "test_buffer_read_int2", "test_buffer_read_int4", "test_buffer_read_int8", "test_buffer_read_int16" };
static const char *buffer_read_uint_kernel_code[] = {
"__kernel void test_buffer_read_uint(__global uint *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n",
"__kernel void test_buffer_read_uint2(__global uint2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n",
"__kernel void test_buffer_read_uint4(__global uint4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n",
"__kernel void test_buffer_read_uint8(__global uint8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n",
"__kernel void test_buffer_read_uint16(__global uint16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n" };
static const char *uint_kernel_name[] = { "test_buffer_read_uint", "test_buffer_read_uint2", "test_buffer_read_uint4", "test_buffer_read_uint8", "test_buffer_read_uint16" };
static const char *buffer_read_long_kernel_code[] = {
"__kernel void test_buffer_read_long(__global long *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n",
"__kernel void test_buffer_read_long2(__global long2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n",
"__kernel void test_buffer_read_long4(__global long4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n",
"__kernel void test_buffer_read_long8(__global long8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n",
"__kernel void test_buffer_read_long16(__global long16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n" };
static const char *long_kernel_name[] = { "test_buffer_read_long", "test_buffer_read_long2", "test_buffer_read_long4", "test_buffer_read_long8", "test_buffer_read_long16" };
static const char *buffer_read_ulong_kernel_code[] = {
"__kernel void test_buffer_read_ulong(__global ulong *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n",
"__kernel void test_buffer_read_ulong2(__global ulong2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n",
"__kernel void test_buffer_read_ulong4(__global ulong4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n",
"__kernel void test_buffer_read_ulong8(__global ulong8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n",
"__kernel void test_buffer_read_ulong16(__global ulong16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n" };
static const char *ulong_kernel_name[] = { "test_buffer_read_ulong", "test_buffer_read_ulong2", "test_buffer_read_ulong4", "test_buffer_read_ulong8", "test_buffer_read_ulong16" };
static const char *buffer_read_short_kernel_code[] = {
"__kernel void test_buffer_read_short(__global short *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_short2(__global short2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_short4(__global short4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_short8(__global short8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_short16(__global short16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n" };
static const char *short_kernel_name[] = { "test_buffer_read_short", "test_buffer_read_short2", "test_buffer_read_short4", "test_buffer_read_short8", "test_buffer_read_short16" };
static const char *buffer_read_ushort_kernel_code[] = {
"__kernel void test_buffer_read_ushort(__global ushort *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_ushort2(__global ushort2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_ushort4(__global ushort4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_ushort8(__global ushort8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_ushort16(__global ushort16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n" };
static const char *ushort_kernel_name[] = { "test_buffer_read_ushort", "test_buffer_read_ushort2", "test_buffer_read_ushort4", "test_buffer_read_ushort8", "test_buffer_read_ushort16" };
static const char *buffer_read_float_kernel_code[] = {
"__kernel void test_buffer_read_float(__global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n",
"__kernel void test_buffer_read_float2(__global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n",
"__kernel void test_buffer_read_float4(__global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n",
"__kernel void test_buffer_read_float8(__global float8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n",
"__kernel void test_buffer_read_float16(__global float16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n" };
static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffer_read_float2", "test_buffer_read_float4", "test_buffer_read_float8", "test_buffer_read_float16" };
static const char *buffer_read_char_kernel_code[] = {
"__kernel void test_buffer_read_char(__global char *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n",
"__kernel void test_buffer_read_char2(__global char2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n",
"__kernel void test_buffer_read_char4(__global char4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n",
"__kernel void test_buffer_read_char8(__global char8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n",
"__kernel void test_buffer_read_char16(__global char16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n" };
static const char *char_kernel_name[] = { "test_buffer_read_char", "test_buffer_read_char2", "test_buffer_read_char4", "test_buffer_read_char8", "test_buffer_read_char16" };
static const char *buffer_read_uchar_kernel_code[] = {
"__kernel void test_buffer_read_uchar(__global uchar *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = 'w';\n"
"}\n",
"__kernel void test_buffer_read_uchar2(__global uchar2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (uchar)'w';\n"
"}\n",
"__kernel void test_buffer_read_uchar4(__global uchar4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (uchar)'w';\n"
"}\n",
"__kernel void test_buffer_read_uchar8(__global uchar8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (uchar)'w';\n"
"}\n",
"__kernel void test_buffer_read_uchar16(__global uchar16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (uchar)'w';\n"
"}\n" };
static const char *uchar_kernel_name[] = { "test_buffer_read_uchar", "test_buffer_read_uchar2", "test_buffer_read_uchar4", "test_buffer_read_uchar8", "test_buffer_read_uchar16" };
static const char *buffer_read_struct_kernel_code[] = {
"typedef struct{\n"
"int a;\n"
"float b;\n"
"} TestStruct;\n"
"__kernel void test_buffer_read_struct(__global TestStruct *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid].a = ((1<<16)+1);\n"
" dst[tid].b = (float)3.40282346638528860e+38;\n"
"}\n" };
static const char *struct_kernel_name[] = { "test_buffer_read_struct" };
//--- the verify functions
static int verify_read_int(void *ptr, int n)
{
int i;
int *outptr = (int *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != TEST_PRIME_INT )
return -1;
}
return 0;
}
static int verify_read_uint(void *ptr, int n)
{
int i;
cl_uint *outptr = (cl_uint *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != TEST_PRIME_UINT )
return -1;
}
return 0;
}
static int verify_read_long(void *ptr, int n)
{
int i;
cl_long *outptr = (cl_long *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != TEST_PRIME_LONG )
return -1;
}
return 0;
}
static int verify_read_ulong(void *ptr, int n)
{
int i;
cl_ulong *outptr = (cl_ulong *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != TEST_PRIME_ULONG )
return -1;
}
return 0;
}
static int verify_read_short(void *ptr, int n)
{
int i;
short *outptr = (short *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != (short)((1<<8)+1) )
return -1;
}
return 0;
}
static int verify_read_ushort(void *ptr, int n)
{
int i;
cl_ushort *outptr = (cl_ushort *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != (cl_ushort)((1<<8)+1) )
return -1;
}
return 0;
}
static int verify_read_float( void *ptr, int n )
{
int i;
float *outptr = (float *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != TEST_PRIME_FLOAT )
return -1;
}
return 0;
}
static int verify_read_char(void *ptr, int n)
{
int i;
char *outptr = (char *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != TEST_PRIME_CHAR )
return -1;
}
return 0;
}
static int verify_read_uchar( void *ptr, int n )
{
int i;
cl_uchar *outptr = (cl_uchar *)ptr;
for ( i = 0; i < n; i++ ){
if ( outptr[i] != TEST_PRIME_CHAR )
return -1;
}
return 0;
}
static int verify_read_struct( void *ptr, int n )
{
int i;
TestStruct *outptr = (TestStruct *)ptr;
for ( i = 0; i < n; i++ ){
if ( ( outptr[i].a != TEST_PRIME_INT ) ||
( outptr[i].b != TEST_PRIME_FLOAT ) )
return -1;
}
return 0;
}
//----- the test functions
static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
{
cl_mem buffers[5];
void *outptr[5];
cl_program program[5];
cl_kernel kernel[5];
size_t threads[3], localThreads[3];
cl_int err;
int i;
size_t ptrSizes[5];
int src_flag_id;
int total_errors = 0;
void *mappedPtr;
size_t min_alignment = get_min_alignment(context);
threads[0] = (cl_uint)num_elements;
ptrSizes[0] = size;
ptrSizes[1] = ptrSizes[0] << 1;
ptrSizes[2] = ptrSizes[1] << 1;
ptrSizes[3] = ptrSizes[2] << 1;
ptrSizes[4] = ptrSizes[3] << 1;
//embedded devices don't support long/ulong so skip over
if (! gHasLong && strstr(type,"long"))
return 0;
for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
for ( i = 0; i < loops; i++ ){
outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
if ( ! outptr[i] ){
log_error( " unable to allocate %d bytes of memory\n", (int)ptrSizes[i] * num_elements );
return -1;
}
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, outptr[i], &err);
else
buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err);
if ( ! buffers[i] | err){
print_error(err, "clCreateBuffer failed\n" );
align_free( outptr[i] );
return -1;
}
err = create_single_kernel_helper(context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
if ( err ){
log_error( " Error creating program for %s\n", type );
clReleaseMemObject( buffers[i] );
align_free( outptr[i] );
return -1;
}
err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed\n" );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
clReleaseMemObject( buffers[i] );
align_free( outptr[i] );
return -1;
}
threads[0] = (cl_uint)num_elements;
err = get_max_common_work_group_size( context, kernel[i], threads[0], &localThreads[0] );
test_error( err, "Unable to get work group size to use" );
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed\n" );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
clReleaseMemObject( buffers[i] );
align_free( outptr[i] );
return -1;
}
mappedPtr = clEnqueueMapBuffer(queue, buffers[i], CL_TRUE, CL_MAP_READ, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueMapBuffer failed" );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
clReleaseMemObject( buffers[i] );
align_free( outptr[i] );
return -1;
}
if (fn(mappedPtr, num_elements*(1<<i))){
log_error(" %s%d test failed\n", type, 1<<i);
total_errors++;
}
else{
log_info(" %s%d test passed\n", type, 1<<i);
}
err = clEnqueueUnmapMemObject(queue, buffers[i], mappedPtr, 0, NULL, NULL);
test_error(err, "clEnqueueUnmapMemObject failed");
// cleanup
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
clReleaseMemObject( buffers[i] );
// If we are using the outptr[i] as backing via USE_HOST_PTR we need to make sure we are done before freeing.
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)) {
err = clFinish(queue);
test_error(err, "clFinish failed");
}
align_free( outptr[i] );
}
} // cl_mem_flags
return total_errors;
} // end test_buffer_map_read()
#define DECLARE_LOCK_TEST(type, realType) \
int test_buffer_map_read_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
{ \
return test_buffer_map_read( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
}
DECLARE_LOCK_TEST(int, cl_int)
DECLARE_LOCK_TEST(uint, cl_uint)
DECLARE_LOCK_TEST(long, cl_long)
DECLARE_LOCK_TEST(ulong, cl_ulong)
DECLARE_LOCK_TEST(short, cl_short)
DECLARE_LOCK_TEST(ushort, cl_ushort)
DECLARE_LOCK_TEST(char, cl_char)
DECLARE_LOCK_TEST(uchar, cl_uchar)
DECLARE_LOCK_TEST(float, cl_float)
int test_buffer_map_read_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
int (*foo)(void *,int);
foo = verify_read_struct;
return test_buffer_map_read( deviceID, context, queue, num_elements, sizeof( TestStruct ), (char*)"struct", 1,
buffer_read_struct_kernel_code, struct_kernel_name, foo );
} // end test_buffer_map_struct_read()

View File

@@ -0,0 +1,524 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#ifndef uchar
typedef unsigned char uchar;
#endif
#define USE_LOCAL_WORK_GROUP 1
const char *mem_read_write_kernel_code =
"__kernel void test_mem_read_write(__global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = dst[tid]+1;\n"
"}\n";
const char *mem_read_kernel_code =
"__kernel void test_mem_read(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src[tid]+1;\n"
"}\n";
const char *mem_write_kernel_code =
"__kernel void test_mem_write(__global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = dst[tid]+1;\n"
"}\n";
static int verify_mem( int *outptr, int n )
{
int i;
for ( i = 0; i < n; i++ ){
if ( outptr[i] != ( i + 1 ) )
return -1;
}
return 0;
}
int test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
cl_mem buffers[1];
cl_int *inptr, *outptr;
cl_program program[1];
cl_kernel kernel[1];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
inptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
outptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, NULL, &err);
if (err != CL_SUCCESS) {
print_error( err, "clCreateBuffer failed");
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
for (i=0; i<num_elements; i++)
inptr[i] = i;
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
if (err != CL_SUCCESS) {
print_error( err, "clEnqueueWriteBuffer failed");
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
if (err){
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if (err != CL_SUCCESS){
log_error("clEnqueueNDRangeKernel failed\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
if (verify_mem(outptr, num_elements)){
log_error("buffer_MEM_READ_WRITE test failed\n");
err = -1;
}
else{
log_info("buffer_MEM_READ_WRITE test passed\n");
err = 0;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return err;
} // end test_mem_read_write()
int test_mem_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
cl_mem buffers[1];
int *inptr, *outptr;
cl_program program[1];
cl_kernel kernel[1];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! inptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
return -1;
}
outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! outptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
align_free( (void *)inptr );
return -1;
}
buffers[0] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
if (err != CL_SUCCESS)
{
print_error(err, "clCreateBuffer failed\n");
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
for (i=0; i<num_elements; i++)
inptr[i] = i;
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
if (err != CL_SUCCESS){
print_error( err, "clEnqueueWriteBuffer failed" );
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_write_kernel_code, "test_mem_write" );
if (err){
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "Error reading array" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return err;
} // end test_mem_write()
int test_mem_read_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
cl_mem buffers[2];
int *inptr, *outptr;
cl_program program[1];
cl_kernel kernel[1];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! inptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
return -1;
}
outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! outptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
align_free( (void *)inptr );
return -1;
}
buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed to create READ_ONLY array\n" );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
for (i=0; i<num_elements; i++)
inptr[i] = i;
buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed to create MEM_ALLOC_GLOBAL_POOL array\n" );
clReleaseMemObject( buffers[0]) ;
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueWriteBuffer() failed");
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_kernel_code, "test_mem_read" );
if ( err ){
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&buffers[1] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArgs failed" );
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if (err != CL_SUCCESS){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
if (verify_mem(outptr, num_elements)){
log_error( " CL_MEM_READ_ONLY test failed\n" );
err = -1;
}
else{
log_info( " CL_MEM_READ_ONLY test passed\n" );
err = 0;
}
// cleanup
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return err;
} // end test_mem_read()
int test_mem_copy_host_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
cl_mem buffers[1];
int *ptr;
cl_program program[1];
cl_kernel kernel[1];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
ptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! ptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
return -1;
}
for (i=0; i<num_elements; i++)
ptr[i] = i;
buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, (void *)ptr, &err);
if (err != CL_SUCCESS){
print_error(err, "clCreateBuffer failed for CL_MEM_COPY_HOST_PTR\n");
align_free( (void *)ptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
if (err){
clReleaseMemObject( buffers[0] );
align_free( (void *)ptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
if (err != CL_SUCCESS){
log_error("clSetKernelArgs failed\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if (err != CL_SUCCESS){
log_error("clEnqueueNDRangeKernel failed\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)ptr, 0, NULL, NULL );
if (err != CL_SUCCESS){
log_error("CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_CONSTANT_POOL failed.\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return -1;
}
if ( verify_mem( ptr, num_elements ) ){
log_error("CL_MEM_COPY_HOST_PTR test failed\n");
err = -1;
}
else{
log_info("CL_MEM_COPY_HOST_PTR test passed\n");
err = 0;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return err;
} // end test_mem_copy_host_flags()

View File

@@ -0,0 +1,417 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <stdio.h>
#include <stdlib.h>
#include "procs.h"
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/testHarness.h"
#define MAX_SUB_DEVICES 16 // Limit the sub-devices to ensure no out of resource errors.
#define BUFFER_SIZE 1024
// Kernel source code
static const char *buffer_migrate_kernel_code =
"__kernel void test_buffer_migrate(__global uint *dst, __global uint *src1, __global uint *src2, uint x)\n"
"{\n"
" int tid = get_global_id(0);\n"
" dst[tid] = src1[tid] ^ src2[tid] ^ x;\n"
"}\n";
enum migrations { MIGRATE_PREFERRED, // migrate to the preferred sub-device
MIGRATE_NON_PREFERRED, // migrate to a randomly chosen non-preferred sub-device
MIGRATE_RANDOM, // migrate to a randomly chosen sub-device with randomly chosen flags
NUMBER_OF_MIGRATIONS };
static cl_mem init_buffer(cl_command_queue cmd_q, cl_mem buffer, cl_uint *data)
{
cl_int err;
if (buffer) {
if ((err = clEnqueueWriteBuffer(cmd_q, buffer, CL_TRUE, 0, sizeof(cl_uint)*BUFFER_SIZE, data, 0, NULL, NULL)) != CL_SUCCESS) {
print_error(err, "Failed on enqueue write of buffer data.");
}
}
return buffer;
}
static cl_int migrateMemObject(enum migrations migrate, cl_command_queue *queues, cl_mem *mem_objects, cl_uint num_devices, cl_mem_migration_flags *flags, MTdata d)
{
cl_uint i, j;
cl_int err = CL_SUCCESS;
for (i=0; i<num_devices; i++) {
j = genrand_int32(d) % num_devices;
flags[i] = 0;
switch (migrate) {
case MIGRATE_PREFERRED:
// Force the device to be preferred
j = i;
break;
case MIGRATE_NON_PREFERRED:
// Coerce the device to be non-preferred
if ((j == i) && (num_devices > 1)) j = (j+1) % num_devices;
break;
case MIGRATE_RANDOM:
// Choose a random set of flags
flags[i] = (cl_mem_migration_flags)(genrand_int32(d) & (CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED));;
break;
}
if ((err = clEnqueueMigrateMemObjects(queues[j], 1, (const cl_mem *)(&mem_objects[i]), flags[i], 0, NULL, NULL)) != CL_SUCCESS) {
print_error(err, "Failed migrating memory object.");
}
}
return err;
}
static cl_int restoreBuffer(cl_command_queue *queues, cl_mem *buffers, cl_uint num_devices, cl_mem_migration_flags *flags, cl_uint *buffer)
{
cl_uint i, j;
cl_int err;
// If the buffer was previously migrated with undefined content, reload the content.
for (i=0; i<num_devices; i++) {
if (flags[i] & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) {
if ((err = clEnqueueWriteBuffer(queues[i], buffers[i], CL_TRUE, 0, sizeof(cl_uint)*BUFFER_SIZE, buffer, 0, NULL, NULL)) != CL_SUCCESS) {
print_error(err, "Failed on restoration enqueue write of buffer data.");
return err;
}
}
}
return CL_SUCCESS;
}
int test_buffer_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int failed = 0;
cl_uint i, j;
cl_int err;
cl_uint max_sub_devices = 0;
cl_uint num_devices, num_devices_limited;
cl_uint A[BUFFER_SIZE], B[BUFFER_SIZE], C[BUFFER_SIZE];
cl_uint test_number = 1;
cl_device_affinity_domain domain, domains;
cl_device_id *devices;
cl_command_queue *queues;
cl_mem_migration_flags *flagsA, *flagsB, *flagsC;
cl_device_partition_property property[] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0, 0};
cl_mem *bufferA, *bufferB, *bufferC;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_context ctx = NULL; // context for all sub-devices
enum migrations migrateA, migrateB, migrateC;
MTdata d = init_genrand(gRandomSeed);
const size_t wgs[1] = {BUFFER_SIZE};
/* Allocate arrays whose size varies according to the maximum number of sub-devices */
if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_sub_devices), &max_sub_devices, NULL)) != CL_SUCCESS) {
print_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_COMPUTE_UNITS) failed");
return -1;
}
if (max_sub_devices < 1) {
log_error("ERROR: Invalid number of compute units returned.\n");
return -1;
}
devices = (cl_device_id *)malloc(max_sub_devices * sizeof(cl_device_id));
queues = (cl_command_queue *)malloc(max_sub_devices * sizeof(cl_command_queue));
flagsA = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
flagsB = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
flagsC = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
bufferA = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
bufferB = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
bufferC = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
if ((devices == NULL) || (queues == NULL) ||
(flagsA == NULL) || (flagsB == NULL) || (flagsC == NULL) ||
(bufferA == NULL) || (bufferB == NULL) || (bufferC == NULL)) {
log_error("ERROR: Failed to successfully allocate required local buffers.\n");
failed = -1;
goto cleanup_allocations;
}
for (i=0; i<max_sub_devices; i++) {
devices[i] = NULL;
queues [i] = NULL;
bufferA[i] = bufferB[i] = bufferC[i] = NULL;
}
for (i=0; i<BUFFER_SIZE; i++) {
A[i] = genrand_int32(d);
B[i] = genrand_int32(d);
}
// Attempt to partition the device along each of the allowed affinity domain.
if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(domains), &domains, NULL)) != CL_SUCCESS) {
print_error(err, "clGetDeviceInfo(CL_PARTITION_AFFINITY_DOMAIN) failed");
return -1;
}
domains &= (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE |
CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE | CL_DEVICE_AFFINITY_DOMAIN_NUMA);
do {
if (domains) {
for (domain = 1; (domain & domains) == 0; domain <<= 1) {};
domains &= ~domain;
} else {
domain = 0;
}
// Determine the number of partitions for the device given the specific domain.
if (domain) {
property[1] = domain;
err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, -1, NULL, &num_devices);
if ((err != CL_SUCCESS) || (num_devices == 0)) {
print_error(err, "Obtaining the number of partions by affinity failed.");
failed = 1;
goto cleanup;
}
} else {
num_devices = 1;
}
if (num_devices > 1) {
// Create each of the sub-devices and a corresponding context.
if ((err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, num_devices, devices, &num_devices)) != CL_SUCCESS) {
print_error(err, "Failed creating sub devices.");
failed = 1;
goto cleanup;
}
// Create a context containing all the sub-devices
ctx = clCreateContext(NULL, num_devices, devices, notify_callback, NULL, &err);
if (ctx == NULL) {
print_error(err, "Failed creating context containing the sub-devices.");
failed = 1;
goto cleanup;
}
// Create a command queue for each sub-device
for (i=0; i<num_devices; i++) {
if (devices[i]) {
if ((queues[i] = clCreateCommandQueueWithProperties(ctx, devices[i], 0, &err)) == NULL) {
print_error(err, "Failed creating command queues.");
failed = 1;
goto cleanup;
}
}
}
} else {
// No partitioning available. Just exercise the APIs on a single device.
devices[0] = deviceID;
queues[0] = queue;
ctx = context;
}
// Build the kernel program.
if (err = create_single_kernel_helper(ctx, &program, &kernel, 1, &buffer_migrate_kernel_code, "test_buffer_migrate")) {
print_error(err, "Failed creating kernel.");
failed = 1;
goto cleanup;
}
num_devices_limited = num_devices;
// Allocate memory buffers. 3 buffers (2 input, 1 output) for each sub-device.
// If we run out of memory, then restrict the number of sub-devices to be tested.
for (i=0; i<num_devices; i++) {
bufferA[i] = init_buffer(queues[i], clCreateBuffer(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR), sizeof(cl_uint) * BUFFER_SIZE, NULL, &err), A);
bufferB[i] = init_buffer(queues[i], clCreateBuffer(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR), sizeof(cl_uint) * BUFFER_SIZE, NULL, &err), B);
bufferC[i] = clCreateBuffer(ctx, (CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR), sizeof(cl_uint) * BUFFER_SIZE, NULL, &err);
if ((bufferA[i] == NULL) || (bufferB[i] == NULL) || (bufferC[i] == NULL)) {
if (i == 0) {
log_error("Failed to allocate even 1 set of buffers.\n");
failed = 1;
goto cleanup;
}
num_devices_limited = i;
break;
}
}
// For each partition, we will execute the test kernel with each of the 3 buffers migrated to one of the migrate options
for (migrateA=(enum migrations)(0); migrateA<NUMBER_OF_MIGRATIONS; migrateA = (enum migrations)((int)migrateA + 1)) {
if (migrateMemObject(migrateA, queues, bufferA, num_devices_limited, flagsA, d) != CL_SUCCESS) {
failed = 1;
goto cleanup;
}
for (migrateC=(enum migrations)(0); migrateC<NUMBER_OF_MIGRATIONS; migrateC = (enum migrations)((int)migrateC + 1)) {
if (migrateMemObject(migrateC, queues, bufferC, num_devices_limited, flagsC, d) != CL_SUCCESS) {
failed = 1;
goto cleanup;
}
for (migrateB=(enum migrations)(0); migrateB<NUMBER_OF_MIGRATIONS; migrateB = (enum migrations)((int)migrateB + 1)) {
if (migrateMemObject(migrateB, queues, bufferB, num_devices_limited, flagsB, d) != CL_SUCCESS) {
failed = 1;
goto cleanup;
}
// Run the test on each of the partitions.
for (i=0; i<num_devices_limited; i++) {
cl_uint x;
x = i + test_number;
if ((err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (const void *)&bufferC[i])) != CL_SUCCESS) {
print_error(err, "Failed set kernel argument 0.");
failed = 1;
goto cleanup;
}
if ((err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (const void *)&bufferA[i])) != CL_SUCCESS) {
print_error(err, "Failed set kernel argument 1.");
failed = 1;
goto cleanup;
}
if ((err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (const void *)&bufferB[i])) != CL_SUCCESS) {
print_error(err, "Failed set kernel argument 2.");
failed = 1;
goto cleanup;
}
if ((err = clSetKernelArg(kernel, 3, sizeof(cl_uint), (const void *)&x)) != CL_SUCCESS) {
print_error(err, "Failed set kernel argument 3.");
failed = 1;
goto cleanup;
}
if ((err = clEnqueueNDRangeKernel(queues[i], kernel, 1, NULL, wgs, NULL, 0, NULL, NULL)) != CL_SUCCESS) {
print_error(err, "Failed enqueueing the NDRange kernel.");
failed = 1;
goto cleanup;
}
}
// Verify the results as long as neither input is an undefined migration
for (i=0; i<num_devices_limited; i++, test_number++) {
if (((flagsA[i] | flagsB[i]) & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) == 0) {
if ((err = clEnqueueReadBuffer(queues[i], bufferC[i], CL_TRUE, 0, sizeof(cl_uint)*BUFFER_SIZE, C, 0, NULL, NULL)) != CL_SUCCESS) {
print_error(err, "Failed reading output buffer.");
failed = 1;
goto cleanup;
}
for (j=0; j<BUFFER_SIZE; j++) {
cl_uint expected;
expected = A[j] ^ B[j] ^ test_number;
if (C[j] != expected) {
log_error("Failed on device %d, work item %4d, expected 0x%08x got 0x%08x (0x%08x ^ 0x%08x ^ 0x%08x)\n", i, j, expected, C[j], A[j], B[j], test_number);
failed = 1;
}
}
if (failed) goto cleanup;
}
}
if (restoreBuffer(queues, bufferB, num_devices_limited, flagsB, B) != CL_SUCCESS) {
failed = 1;
goto cleanup;
}
}
}
if (restoreBuffer(queues, bufferA, num_devices_limited, flagsA, A) != CL_SUCCESS) {
failed = 1;
goto cleanup;
}
}
cleanup:
// Clean up all the allocted resources create by the test. This includes sub-devices,
// command queues, and memory buffers.
for (i=0; i<max_sub_devices; i++) {
// Memory buffer cleanup
if (bufferA[i]) {
if ((err = clReleaseMemObject(bufferA[i])) != CL_SUCCESS) {
print_error(err, "Failed releasing memory object.");
failed = 1;
}
}
if (bufferB[i]) {
if ((err = clReleaseMemObject(bufferB[i])) != CL_SUCCESS) {
print_error(err, "Failed releasing memory object.");
failed = 1;
}
}
if (bufferC[i]) {
if ((err = clReleaseMemObject(bufferC[i])) != CL_SUCCESS) {
print_error(err, "Failed releasing memory object.");
failed = 1;
}
}
if (num_devices > 1) {
// Command queue cleanup
if (queues[i]) {
if ((err = clReleaseCommandQueue(queues[i])) != CL_SUCCESS) {
print_error(err, "Failed releasing command queue.");
failed = 1;
}
}
// Sub-device cleanup
if (devices[i]) {
if ((err = clReleaseDevice(devices[i])) != CL_SUCCESS) {
print_error(err, "Failed releasing sub device.");
failed = 1;
}
}
devices[i] = 0;
}
}
// Context, program, and kernel cleanup
if (program) {
if ((err = clReleaseProgram(program)) != CL_SUCCESS) {
print_error(err, "Failed releasing program.");
failed = 1;
}
program = NULL;
}
if (kernel) {
if ((err = clReleaseKernel(kernel)) != CL_SUCCESS) {
print_error(err, "Failed releasing kernel.");
failed = 1;
}
kernel = NULL;
}
if (ctx && (ctx != context)) {
if ((err = clReleaseContext(ctx)) != CL_SUCCESS) {
print_error(err, "Failed releasing context.");
failed = 1;
}
}
ctx = NULL;
if (failed) goto cleanup_allocations;
} while (domains);
cleanup_allocations:
if (devices) free(devices);
if (queues) free(queues);
if (flagsA) free(flagsA);
if (flagsB) free(flagsB);
if (flagsC) free(flagsC);
if (bufferA) free(bufferA);
if (bufferB) free(bufferB);
if (bufferC) free(bufferC);
return ((failed) ? -1 : 0);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,487 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <stdio.h>
#include <stdlib.h>
#include "procs.h"
#include "../../test_common/harness/errorHelpers.h"
#define MAX_SUB_DEVICES 16 // Limit the sub-devices to ensure no out of resource errors.
#define MEM_OBJ_SIZE 1024
#define IMAGE_DIM 16
// Kernel source code
static const char *image_migrate_kernel_code =
"__kernel void test_image_migrate(write_only image2d_t dst, read_only image2d_t src1,\n"
" read_only image2d_t src2, sampler_t sampler, uint x)\n"
"{\n"
" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
" int2 coords = (int2) {tidX, tidY};\n"
" uint4 val = read_imageui(src1, sampler, coords) ^\n"
" read_imageui(src2, sampler, coords) ^\n"
" x;\n"
" write_imageui(dst, coords, val);\n"
"}\n";
enum migrations { MIGRATE_PREFERRED, // migrate to the preferred sub-device
MIGRATE_NON_PREFERRED, // migrate to a randomly chosen non-preferred sub-device
MIGRATE_RANDOM, // migrate to a randomly chosen sub-device with randomly chosen flags
NUMBER_OF_MIGRATIONS };
static cl_mem init_image(cl_command_queue cmd_q, cl_mem image, cl_uint *data)
{
cl_int err;
size_t origin[3] = {0, 0, 0};
size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
if (image) {
if ((err = clEnqueueWriteImage(cmd_q, image, CL_TRUE,
origin, region, 0, 0, data, 0, NULL, NULL)) != CL_SUCCESS) {
print_error(err, "Failed on enqueue write of image data.");
}
}
return image;
}
static cl_int migrateMemObject(enum migrations migrate, cl_command_queue *queues, cl_mem *mem_objects,
cl_uint num_devices, cl_mem_migration_flags *flags, MTdata d)
{
cl_uint i, j;
cl_int err = CL_SUCCESS;
for (i=0; i<num_devices; i++) {
j = genrand_int32(d) % num_devices;
flags[i] = 0;
switch (migrate) {
case MIGRATE_PREFERRED:
// Force the device to be preferred
j = i;
break;
case MIGRATE_NON_PREFERRED:
// Coerce the device to be non-preferred
if ((j == i) && (num_devices > 1)) j = (j+1) % num_devices;
break;
case MIGRATE_RANDOM:
// Choose a random set of flags
flags[i] = (cl_mem_migration_flags)(genrand_int32(d) & (CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED));
break;
}
if ((err = clEnqueueMigrateMemObjects(queues[j], 1, (const cl_mem *)(&mem_objects[i]),
flags[i], 0, NULL, NULL)) != CL_SUCCESS) {
print_error(err, "Failed migrating memory object.");
}
}
return err;
}
static cl_int restoreImage(cl_command_queue *queues, cl_mem *mem_objects, cl_uint num_devices,
cl_mem_migration_flags *flags, cl_uint *buffer)
{
cl_uint i;
cl_int err;
const size_t origin[3] = {0, 0, 0};
const size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
// If the image was previously migrated with undefined content, reload the content.
for (i=0; i<num_devices; i++) {
if (flags[i] & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) {
if ((err = clEnqueueWriteImage(queues[i], mem_objects[i], CL_TRUE,
origin, region, 0, 0, buffer, 0, NULL, NULL)) != CL_SUCCESS) {
print_error(err, "Failed on restoration enqueue write of image data.");
return err;
}
}
}
return CL_SUCCESS;
}
// Declaration moved out of protected scope/goto
cl_sampler_properties properties[] = {
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP,
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
0
};
int test_image_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int failed = 0;
cl_uint i, j;
cl_int err;
cl_uint max_sub_devices = 0;
cl_uint num_devices, num_devices_limited;
cl_uint A[MEM_OBJ_SIZE], B[MEM_OBJ_SIZE], C[MEM_OBJ_SIZE];
cl_uint test_number = 1;
cl_device_affinity_domain domain, domains;
cl_device_id *devices;
cl_command_queue *queues;
cl_mem_migration_flags *flagsA, *flagsB, *flagsC;
cl_device_partition_property property[] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0, 0};
cl_mem *imageA, *imageB, *imageC;
cl_mem_flags flags;
cl_image_format format;
cl_sampler sampler = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_context ctx = NULL;
enum migrations migrateA, migrateB, migrateC;
MTdata d = init_genrand(gRandomSeed);
const size_t wgs[2] = {IMAGE_DIM, IMAGE_DIM};
const size_t wls[2] = {1, 1};
// Check for image support.
if(checkForImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
log_info("Device does not support images. Skipping test.\n");
return 0;
}
// Allocate arrays whose size varies according to the maximum number of sub-devices.
if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_sub_devices), &max_sub_devices, NULL)) != CL_SUCCESS) {
print_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_COMPUTE_UNITS) failed");
return -1;
}
if (max_sub_devices < 1) {
log_error("ERROR: Invalid number of compute units returned.\n");
return -1;
}
devices = (cl_device_id *)malloc(max_sub_devices * sizeof(cl_device_id));
queues = (cl_command_queue *)malloc(max_sub_devices * sizeof(cl_command_queue));
flagsA = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
flagsB = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
flagsC = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
imageA = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
imageB = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
imageC = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
if ((devices == NULL) || (queues == NULL) ||
(flagsA == NULL) || (flagsB == NULL) || (flagsC == NULL) ||
(imageA == NULL) || (imageB == NULL) || (imageC == NULL)) {
log_error("ERROR: Failed to successfully allocate required local buffers.\n");
failed = -1;
goto cleanup_allocations;
}
for (i=0; i<max_sub_devices; i++) {
devices[i] = NULL;
queues [i] = NULL;
imageA[i] = imageB[i] = imageC[i] = NULL;
}
for (i=0; i<MEM_OBJ_SIZE; i++) {
A[i] = genrand_int32(d);
B[i] = genrand_int32(d);
}
// Set image format.
format.image_channel_order = CL_RGBA;
format.image_channel_data_type = CL_UNSIGNED_INT32;
// Attempt to partition the device along each of the allowed affinity domain.
if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(domains), &domains, NULL)) != CL_SUCCESS) {
print_error(err, "clGetDeviceInfo(CL_PARTITION_AFFINITY_DOMAIN) failed");
return -1;
}
domains &= (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE |
CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE | CL_DEVICE_AFFINITY_DOMAIN_NUMA);
do {
if (domains) {
for (domain = 1; (domain & domains) == 0; domain <<= 1) {};
domains &= ~domain;
} else {
domain = 0;
}
// Determine the number of partitions for the device given the specific domain.
if (domain) {
property[1] = domain;
err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, -1, NULL, &num_devices);
if ((err != CL_SUCCESS) || (num_devices == 0)) {
print_error(err, "Obtaining the number of partions by affinity failed.");
failed = 1;
goto cleanup;
}
} else {
num_devices = 1;
}
if (num_devices > 1) {
// Create each of the sub-devices and a corresponding context.
if ((err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, num_devices, devices, &num_devices)) != CL_SUCCESS) {
print_error(err, "Failed creating sub devices.");
failed = 1;
goto cleanup;
}
// Create a context containing all the sub-devices
ctx = clCreateContext(NULL, num_devices, devices, notify_callback, NULL, &err);
if (ctx == NULL) {
print_error(err, "Failed creating context containing the sub-devices.");
failed = 1;
goto cleanup;
}
// Create a command queue for each sub-device
for (i=0; i<num_devices; i++) {
if (devices[i]) {
if ((queues[i] = clCreateCommandQueueWithProperties(ctx, devices[i], 0, &err)) == NULL) {
print_error(err, "Failed creating command queues.");
failed = 1;
goto cleanup;
}
}
}
} else {
// No partitioning available. Just exercise the APIs on a single device.
devices[0] = deviceID;
queues[0] = queue;
ctx = context;
}
// Build the kernel program.
if (err = create_single_kernel_helper(ctx, &program, &kernel, 1, &image_migrate_kernel_code, "test_image_migrate")) {
print_error(err, "Failed creating kernel.");
failed = 1;
goto cleanup;
}
// Create sampler.
sampler = clCreateSamplerWithProperties(ctx, properties, &err );
if ((err != CL_SUCCESS) || !sampler) {
print_error(err, "Failed to create a sampler.");
failed = 1;
goto cleanup;
}
num_devices_limited = num_devices;
// Allocate memory buffers. 3 buffers (2 input, 1 output) for each sub-device.
// If we run out of memory, then restrict the number of sub-devices to be tested.
for (i=0; i<num_devices; i++) {
imageA[i] = init_image(queues[i], create_image_2d(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR),
&format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err), A);
imageB[i] = init_image(queues[i], create_image_2d(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR),
&format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err), B);
imageC[i] = create_image_2d(ctx, (CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR),
&format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err);
if ((imageA[i] == NULL) || (imageB[i] == NULL) || (imageC[i] == NULL)) {
if (i == 0) {
log_error("Failed to allocate even 1 set of buffers.\n");
failed = 1;
goto cleanup;
}
num_devices_limited = i;
break;
}
}
// For each partition, we will execute the test kernel with each of the 3 buffers migrated to one of the migrate options
for (migrateA=(enum migrations)(0); migrateA<NUMBER_OF_MIGRATIONS; migrateA = (enum migrations)((int)migrateA + 1)) {
if (migrateMemObject(migrateA, queues, imageA, num_devices_limited, flagsA, d) != CL_SUCCESS) {
failed = 1;
goto cleanup;
}
for (migrateC=(enum migrations)(0); migrateC<NUMBER_OF_MIGRATIONS; migrateC = (enum migrations)((int)migrateC + 1)) {
if (migrateMemObject(migrateC, queues, imageC, num_devices_limited, flagsC, d) != CL_SUCCESS) {
failed = 1;
goto cleanup;
}
for (migrateB=(enum migrations)(0); migrateB<NUMBER_OF_MIGRATIONS; migrateB = (enum migrations)((int)migrateB + 1)) {
if (migrateMemObject(migrateB, queues, imageB, num_devices_limited, flagsB, d) != CL_SUCCESS) {
failed = 1;
goto cleanup;
}
// Run the test on each of the partitions.
for (i=0; i<num_devices_limited; i++) {
cl_uint x;
x = i + test_number;
if ((err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (const void *)&imageC[i])) != CL_SUCCESS) {
print_error(err, "Failed set kernel argument 0.");
failed = 1;
goto cleanup;
}
if ((err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (const void *)&imageA[i])) != CL_SUCCESS) {
print_error(err, "Failed set kernel argument 1.");
failed = 1;
goto cleanup;
}
if ((err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (const void *)&imageB[i])) != CL_SUCCESS) {
print_error(err, "Failed set kernel argument 2.");
failed = 1;
goto cleanup;
}
if ((err = clSetKernelArg(kernel, 3, sizeof(cl_sampler), (const void *)&sampler)) != CL_SUCCESS) {
print_error(err, "Failed set kernel argument 3.");
failed = 1;
goto cleanup;
}
if ((err = clSetKernelArg(kernel, 4, sizeof(cl_uint), (const void *)&x)) != CL_SUCCESS) {
print_error(err, "Failed set kernel argument 4.");
failed = 1;
goto cleanup;
}
if ((err = clEnqueueNDRangeKernel(queues[i], kernel, 2, NULL, wgs, wls, 0, NULL, NULL)) != CL_SUCCESS) {
print_error(err, "Failed enqueueing the NDRange kernel.");
failed = 1;
goto cleanup;
}
}
// Verify the results as long as neither input is an undefined migration
const size_t origin[3] = {0, 0, 0};
const size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
for (i=0; i<num_devices_limited; i++, test_number++) {
if (((flagsA[i] | flagsB[i]) & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) == 0) {
if ((err = clEnqueueReadImage(queues[i], imageC[i], CL_TRUE,
origin, region, 0, 0, C, 0, NULL, NULL)) != CL_SUCCESS) {
print_error(err, "Failed reading output buffer.");
failed = 1;
goto cleanup;
}
for (j=0; j<MEM_OBJ_SIZE; j++) {
cl_uint expected;
expected = A[j] ^ B[j] ^ test_number;
if (C[j] != expected) {
log_error("Failed on device %d, work item %4d, expected 0x%08x got 0x%08x (0x%08x ^ 0x%08x ^ 0x%08x)\n", i, j, expected, C[j], A[j], B[j], test_number);
failed = 1;
}
}
if (failed) goto cleanup;
}
}
if (restoreImage(queues, imageB, num_devices_limited, flagsB, B) != CL_SUCCESS) {
failed = 1;
goto cleanup;
}
}
}
if (restoreImage(queues, imageA, num_devices_limited, flagsA, A) != CL_SUCCESS) {
failed = 1;
goto cleanup;
}
}
cleanup:
// Clean up all the allocted resources create by the test. This includes sub-devices,
// command queues, and memory buffers.
for (i=0; i<max_sub_devices; i++) {
// Memory buffer cleanup
if (imageA[i]) {
if ((err = clReleaseMemObject(imageA[i])) != CL_SUCCESS) {
print_error(err, "Failed releasing memory object.");
failed = 1;
}
}
if (imageB[i]) {
if ((err = clReleaseMemObject(imageB[i])) != CL_SUCCESS) {
print_error(err, "Failed releasing memory object.");
failed = 1;
}
}
if (imageC[i]) {
if ((err = clReleaseMemObject(imageC[i])) != CL_SUCCESS) {
print_error(err, "Failed releasing memory object.");
failed = 1;
}
}
if (num_devices > 1) {
// Command queue cleanup
if (queues[i]) {
if ((err = clReleaseCommandQueue(queues[i])) != CL_SUCCESS) {
print_error(err, "Failed releasing command queue.");
failed = 1;
}
}
// Sub-device cleanup
if (devices[i]) {
if ((err = clReleaseDevice(devices[i])) != CL_SUCCESS) {
print_error(err, "Failed releasing sub device.");
failed = 1;
}
}
devices[i] = 0;
}
}
// Sampler cleanup
if (sampler) {
if ((err = clReleaseSampler(sampler)) != CL_SUCCESS) {
print_error(err, "Failed releasing sampler.");
failed = 1;
}
sampler = NULL;
}
// Context, program, and kernel cleanup
if (program) {
if ((err = clReleaseProgram(program)) != CL_SUCCESS) {
print_error(err, "Failed releasing program.");
failed = 1;
}
program = NULL;
}
if (kernel) {
if ((err = clReleaseKernel(kernel)) != CL_SUCCESS) {
print_error(err, "Failed releasing kernel.");
failed = 1;
}
kernel = NULL;
}
if (ctx && (ctx != context)) {
if ((err = clReleaseContext(ctx)) != CL_SUCCESS) {
print_error(err, "Failed releasing context.");
failed = 1;
}
}
ctx = NULL;
if (failed) goto cleanup_allocations;
} while (domains);
cleanup_allocations:
if (devices) free(devices);
if (queues) free(queues);
if (flagsA) free(flagsA);
if (flagsB) free(flagsB);
if (flagsC) free(flagsC);
if (imageA) free(imageA);
if (imageB) free(imageB);
if (imageC) free(imageC);
return ((failed) ? -1 : 0);
}

View File

@@ -0,0 +1,631 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
// Design:
// To test sub buffers, we first create one main buffer. We then create several sub-buffers and
// queue Actions on each one. Each Action is encapsulated in a class so it can keep track of
// what results it expects, and so we can test scaling degrees of Actions on scaling numbers of
// sub-buffers.
class SubBufferWrapper : public clMemWrapper
{
public:
cl_mem mParentBuffer;
size_t mOrigin;
size_t mSize;
cl_int Allocate( cl_mem parent, cl_mem_flags flags, size_t origin, size_t size )
{
mParentBuffer = parent;
mOrigin = origin;
mSize = size;
cl_buffer_region region;
region.origin = mOrigin;
region.size = mSize;
cl_int error;
mMem = clCreateSubBuffer( mParentBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &error );
return error;
}
};
class Action
{
public:
virtual ~Action() {}
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) = 0;
virtual const char * GetName( void ) const = 0;
static MTdata d;
static MTdata GetRandSeed( void )
{
if ( d == 0 )
d = init_genrand( gRandomSeed );
return d;
}
static void FreeRandSeed() {
if ( d != 0 ) {
free_mtdata(d);
d = 0;
}
}
};
MTdata Action::d = 0;
class ReadWriteAction : public Action
{
public:
virtual ~ReadWriteAction() {}
virtual const char * GetName( void ) const { return "ReadWrite";}
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
{
cl_char *tempBuffer = (cl_char*)malloc(buffer1.mSize);
if (!tempBuffer) {
log_error("Out of memory\n");
return -1;
}
cl_int error = clEnqueueReadBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
test_error( error, "Unable to enqueue buffer read" );
size_t start = get_random_size_t( 0, buffer1.mSize / 2, GetRandSeed() );
size_t end = get_random_size_t( start, buffer1.mSize, GetRandSeed() );
for ( size_t i = start; i < end; i++ )
{
tempBuffer[ i ] |= tag;
parentBufferState[ i + buffer1.mOrigin ] |= tag;
}
error = clEnqueueWriteBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
test_error( error, "Unable to enqueue buffer write" );
free(tempBuffer);
return CL_SUCCESS;
}
};
#ifndef MAX
#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) )
#endif
#ifndef MIN
#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) )
#endif
class CopyAction : public Action
{
public:
virtual ~CopyAction() {}
virtual const char * GetName( void ) const { return "Copy";}
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
{
// Copy from sub-buffer 1 to sub-buffer 2
size_t size = get_random_size_t( 0, MIN( buffer1.mSize, buffer2.mSize ), GetRandSeed() );
size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() );
cl_int error = clEnqueueCopyBuffer( queue, buffer1, buffer2, startOffset, endOffset, size, 0, NULL, NULL );
test_error( error, "Unable to enqueue buffer copy" );
memcpy( parentBufferState + buffer2.mOrigin + endOffset, parentBufferState + buffer1.mOrigin + startOffset, size );
return CL_SUCCESS;
}
};
class MapAction : public Action
{
public:
virtual ~MapAction() {}
virtual const char * GetName( void ) const { return "Map";}
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
{
size_t size = get_random_size_t( 0, buffer1.mSize, GetRandSeed() );
size_t start = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
cl_int error;
void * mappedPtr = clEnqueueMapBuffer( queue, buffer1, CL_TRUE, (cl_map_flags)( CL_MAP_READ | CL_MAP_WRITE ),
start, size, 0, NULL, NULL, &error );
test_error( error, "Unable to map buffer" );
cl_char *cPtr = (cl_char *)mappedPtr;
for ( size_t i = 0; i < size; i++ )
{
cPtr[ i ] |= tag;
parentBufferState[ i + start + buffer1.mOrigin ] |= tag;
}
error = clEnqueueUnmapMemObject( queue, buffer1, mappedPtr, 0, NULL, NULL );
test_error( error, "Unable to unmap buffer" );
return CL_SUCCESS;
}
};
class KernelReadWriteAction : public Action
{
public:
virtual ~KernelReadWriteAction() {}
virtual const char * GetName( void ) const { return "KernelReadWrite";}
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
{
const char *kernelCode[] = {
"__kernel void readTest( __global char *inBuffer, char tag )\n"
"{\n"
" int tid = get_global_id(0);\n"
" inBuffer[ tid ] |= tag;\n"
"}\n" };
clProgramWrapper program;
clKernelWrapper kernel;
cl_int error;
if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
{
return -1;
}
size_t threads[1] = { buffer1.mSize };
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &buffer1 );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( tag ), &tag );
test_error( error, "Unable to set kernel argument" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Unable to queue kernel" );
for ( size_t i = 0; i < buffer1.mSize; i++ )
parentBufferState[ i + buffer1.mOrigin ] |= tag;
return CL_SUCCESS;
}
};
cl_int get_reasonable_buffer_size( cl_device_id device, size_t &outSize )
{
cl_ulong maxAllocSize;
cl_int error;
// Get the largest possible buffer we could allocate
error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
test_error( error, "Unable to get max alloc size" );
// Don't create a buffer quite that big, just so we have some space left over for other work
outSize = (size_t)( maxAllocSize / 5 );
// Cap at 32M so tests complete in a reasonable amount of time.
if ( outSize > 32 << 20 )
outSize = 32 << 20;
return CL_SUCCESS;
}
size_t find_subbuffer_by_index( SubBufferWrapper * subBuffers, size_t numSubBuffers, size_t index )
{
for ( size_t i = 0; i < numSubBuffers; i++ )
{
if ( subBuffers[ i ].mOrigin > index )
return numSubBuffers;
if ( ( subBuffers[ i ].mOrigin <= index ) && ( ( subBuffers[ i ].mOrigin + subBuffers[ i ].mSize ) > index ) )
return i;
}
return numSubBuffers;
}
// This tests the read/write capabilities of sub buffers (if we are read/write, the sub buffers
// can't overlap)
int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queueA, cl_command_queue queueB, size_t mainSize, size_t addressAlign )
{
clMemWrapper mainBuffer;
SubBufferWrapper subBuffers[ 8 ];
size_t numSubBuffers;
cl_int error;
size_t i;
MTdata m = init_genrand( 22 );
cl_char * mainBufferContents = (cl_char*)calloc(1,mainSize);
cl_char * actualResults = (cl_char*)calloc(1,mainSize);
for ( i = 0; i < mainSize / 4; i++ )
((cl_uint*) mainBufferContents)[i] = genrand_int32(m);
free_mtdata( m );
// Create the main buffer to test against
mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, mainSize, mainBufferContents, &error );
test_error( error, "Unable to create test main buffer" );
// Create some sub-buffers to use
size_t toStartFrom = 0;
for ( numSubBuffers = 0; numSubBuffers < 8; numSubBuffers++ )
{
size_t endRange = toStartFrom + ( mainSize / 4 );
if ( endRange > mainSize )
endRange = mainSize;
size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign;
size_t size = get_random_size_t( 1, ( MIN( mainSize / 8, mainSize - offset ) ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size );
test_error( error, "Unable to allocate sub buffer" );
toStartFrom = offset + size;
if ( toStartFrom > ( mainSize - ( addressAlign * 256 ) ) )
break;
}
ReadWriteAction rwAction;
MapAction mapAction;
CopyAction copyAction;
KernelReadWriteAction kernelAction;
Action * actions[] = { &rwAction, &mapAction, &copyAction, &kernelAction };
int numErrors = 0;
// Do the following steps twice, to make sure the parent gets updated *and* we can
// still work on the sub-buffers
cl_command_queue prev_queue = queueA;
for ( int time = 0; time < 2; time++ )
{
// Randomly apply actions to the set of sub buffers
size_t i;
for ( i = 0; i < 64; i++ )
{
int which = random_in_range( 0, 3, Action::GetRandSeed() );
int whichQueue = random_in_range( 0, 1, Action::GetRandSeed() );
int whichBufferA = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
int whichBufferB;
do
{
whichBufferB = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
} while ( whichBufferB == whichBufferA );
cl_command_queue queue = ( whichQueue == 1 ) ? queueB : queueA;
if (queue != prev_queue) {
error = clFinish( prev_queue );
test_error( error, "Error finishing other queue." );
prev_queue = queue;
}
error = actions[ which ]->Execute( context, queue, (cl_int)i, subBuffers[ whichBufferA ], subBuffers[ whichBufferB ], mainBufferContents );
test_error( error, "Unable to execute action against sub buffers" );
}
error = clFinish( queueA );
test_error( error, "Error finishing queueA." );
error = clFinish( queueB );
test_error( error, "Error finishing queueB." );
// Validate by reading the final contents of the main buffer and
// validating against our ref copy we generated
error = clEnqueueReadBuffer( queueA, mainBuffer, CL_TRUE, 0, mainSize, actualResults, 0, NULL, NULL );
test_error( error, "Unable to enqueue buffer read" );
for ( i = 0; i < mainSize; i += 65536 )
{
size_t left = 65536;
if ( ( i + left ) > mainSize )
left = mainSize - i;
if ( memcmp( actualResults + i, mainBufferContents + i, left ) == 0 )
continue;
// The fast compare failed, so we need to determine where exactly the failure is
for ( size_t j = 0; j < left; j++ )
{
if ( actualResults[ i + j ] != mainBufferContents[ i + j ] )
{
// Hit a failure; report the subbuffer at this address as having failed
size_t sbThatFailed = find_subbuffer_by_index( subBuffers, numSubBuffers, i + j );
if ( sbThatFailed == numSubBuffers )
{
log_error( "ERROR: Validation failure outside of a sub-buffer! (Shouldn't be possible, but it happened at index %ld out of %ld...)\n", i + j, mainSize );
// Since this is a nonsensical, don't bother continuing to check
// (we will, however, print our map of sub-buffers for comparison)
for ( size_t k = 0; k < numSubBuffers; k++ )
{
log_error( "\tBuffer %ld: %ld to %ld (length %ld)\n", k, subBuffers[ k ].mOrigin, subBuffers[ k ].mOrigin + subBuffers[ k ].mSize, subBuffers[ k ].mSize );
}
return -1;
}
log_error( "ERROR: Validation failure on sub-buffer %ld (start: %ld, length: %ld)\n", sbThatFailed, subBuffers[ sbThatFailed ].mOrigin, subBuffers[ sbThatFailed ].mSize );
size_t newPos = subBuffers[ sbThatFailed ].mOrigin + subBuffers[ sbThatFailed ].mSize - 1;
i = newPos & ~65535;
j = newPos - i;
numErrors++;
}
}
}
}
free(mainBufferContents);
free(actualResults);
Action::FreeRandSeed();
return numErrors;
}
int test_sub_buffers_read_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
cl_int error;
size_t mainSize;
cl_uint addressAlignBits;
// Get the size of the main buffer to use
error = get_reasonable_buffer_size( deviceID, mainSize );
test_error( error, "Unable to get reasonable buffer size" );
// Determine the alignment of the device so we can make sure sub buffers are valid
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlignBits ), &addressAlignBits, NULL );
test_error( error, "Unable to get device's address alignment" );
size_t addressAlign = addressAlignBits/8;
return test_sub_buffers_read_write_core( context, queue, queue, mainSize, addressAlign );
}
// This test performs the same basic operations as sub_buffers_read_write, but instead of a single
// device, it creates a context and buffer shared between two devices, then executes commands
// on queues for each device to ensure that everything still operates as expected.
int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
cl_int error;
// First obtain the second device
cl_device_id otherDevice = GetOpposingDevice( deviceID );
if ( otherDevice == NULL )
{
log_error( "ERROR: Unable to obtain a second device for sub-buffer dual-device test.\n" );
return -1;
}
if ( otherDevice == deviceID )
{
log_info( "Note: Unable to run dual-device sub-buffer test (only one device available). Skipping test (implicitly passing).\n" );
return 0;
}
// Determine the device id.
size_t param_size;
error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, 0, NULL, &param_size );
test_error( error, "Error obtaining device name" );
#if !(defined(_WIN32) && defined(_MSC_VER))
char device_name[param_size];
#else
char* device_name = (char*)_malloca(param_size);
#endif
error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, param_size, &device_name[0], NULL );
test_error( error, "Error obtaining device name" );
log_info( "\tOther device obtained for dual device test is type %s\n", device_name );
// Create a shared context for these two devices
cl_device_id devices[ 2 ] = { deviceID, otherDevice };
clContextWrapper testingContext = clCreateContext( NULL, 2, devices, NULL, NULL, &error );
test_error( error, "Unable to create shared context" );
// Create two queues (can't use the existing one, because it's on the wrong context)
clCommandQueueWrapper queue1 = clCreateCommandQueueWithProperties( testingContext, deviceID, 0, &error );
test_error( error, "Unable to create command queue on main device" );
clCommandQueueWrapper queue2 = clCreateCommandQueueWithProperties( testingContext, otherDevice, 0, &error );
test_error( error, "Unable to create command queue on secondary device" );
// Determine the reasonable buffer size and address alignment that applies to BOTH devices
size_t maxBuffer1, maxBuffer2;
error = get_reasonable_buffer_size( deviceID, maxBuffer1 );
test_error( error, "Unable to get buffer size for main device" );
error = get_reasonable_buffer_size( otherDevice, maxBuffer2 );
test_error( error, "Unable to get buffer size for secondary device" );
maxBuffer1 = MIN( maxBuffer1, maxBuffer2 );
cl_uint addressAlign1Bits, addressAlign2Bits;
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL );
test_error( error, "Unable to get main device's address alignment" );
error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL );
test_error( error, "Unable to get secondary device's address alignment" );
cl_uint addressAlign1 = MAX( addressAlign1Bits, addressAlign2Bits ) / 8;
// Finally time to run!
return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 );
}
cl_int read_buffer_via_kernel( cl_context context, cl_command_queue queue, cl_mem buffer, size_t length, cl_char *outResults )
{
const char *kernelCode[] = {
"__kernel void readTest( __global char *inBuffer, __global char *outBuffer )\n"
"{\n"
" int tid = get_global_id(0);\n"
" outBuffer[ tid ] = inBuffer[ tid ];\n"
"}\n" };
clProgramWrapper program;
clKernelWrapper kernel;
cl_int error;
if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
{
return -1;
}
size_t threads[1] = { length };
clMemWrapper outStream = clCreateBuffer( context, CL_MEM_READ_WRITE, length, NULL, &error );
test_error( error, "Unable to create output stream" );
error = clSetKernelArg( kernel, 0, sizeof( buffer ), &buffer );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( outStream ), &outStream );
test_error( error, "Unable to set kernel argument" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Unable to queue kernel" );
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, length, outResults, 0, NULL, NULL );
test_error( error, "Unable to read results from kernel" );
return CL_SUCCESS;
}
int test_sub_buffers_overlapping( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
cl_int error;
size_t mainSize;
cl_uint addressAlign;
clMemWrapper mainBuffer;
SubBufferWrapper subBuffers[ 16 ];
// Create the main buffer to test against
error = get_reasonable_buffer_size( deviceID, mainSize );
test_error( error, "Unable to get reasonable buffer size" );
mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, mainSize, NULL, &error );
test_error( error, "Unable to create test main buffer" );
// Determine the alignment of the device so we can make sure sub buffers are valid
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign ), &addressAlign, NULL );
test_error( error, "Unable to get device's address alignment" );
// Create some sub-buffers to use. Note: they don't have to not overlap (we actually *want* them to overlap)
for ( size_t i = 0; i < 16; i++ )
{
size_t offset = get_random_size_t( 0, mainSize / addressAlign, Action::GetRandSeed() ) * addressAlign;
size_t size = get_random_size_t( 1, ( mainSize - offset ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
error = subBuffers[ i ].Allocate( mainBuffer, CL_MEM_READ_ONLY, offset, size );
test_error( error, "Unable to allocate sub buffer" );
}
/// For logging, we determine the amount of overlap we just generated
// Build a fast in-out map to help with generating the stats
int sbMap[ 32 ], mapSize = 0;
for ( int i = 0; i < 16; i++ )
{
int j;
for ( j = 0; j < mapSize; j++ )
{
size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
: subBuffers[ sbMap[ j ] ].mOrigin;
if ( subBuffers[ i ].mOrigin < pt )
{
// Origin is before this part of the map, so move map forward so we can insert
memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
sbMap[ j ] = i;
mapSize++;
break;
}
}
if ( j == mapSize )
{
sbMap[ j ] = i;
mapSize++;
}
size_t endPt = subBuffers[ i ].mOrigin + subBuffers[ i ].mSize;
for ( j = 0; j < mapSize; j++ )
{
size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
: subBuffers[ sbMap[ j ] ].mOrigin;
if ( endPt < pt )
{
// Origin is before this part of the map, so move map forward so we can insert
memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
sbMap[ j ] = -( i + 1 );
mapSize++;
break;
}
}
if ( j == mapSize )
{
sbMap[ j ] = -( i + 1 );
mapSize++;
}
}
long long delta = 0;
size_t maxOverlap = 1, overlap = 0;
for ( int i = 0; i < 32; i++ )
{
if ( sbMap[ i ] >= 0 )
{
overlap++;
if ( overlap > 1 )
delta -= (long long)( subBuffers[ sbMap[ i ] ].mOrigin );
if ( overlap > maxOverlap )
maxOverlap = overlap;
}
else
{
if ( overlap > 1 )
delta += (long long)( subBuffers[ -sbMap[ i ] - 1 ].mOrigin + subBuffers[ -sbMap[ i ] - 1 ].mSize );
overlap--;
}
}
log_info( "\tTesting %d sub-buffers with %lld overlapping Kbytes (%d%%; as many as %ld buffers overlapping at once)\n",
16, ( delta / 1024LL ), (int)( delta * 100LL / (long long)mainSize ), maxOverlap );
// Write some random contents to the main buffer
cl_char * contents = new cl_char[ mainSize ];
generate_random_data( kChar, mainSize, Action::GetRandSeed(), contents );
error = clEnqueueWriteBuffer( queue, mainBuffer, CL_TRUE, 0, mainSize, contents, 0, NULL, NULL );
test_error( error, "Unable to write to main buffer" );
// Now read from each sub-buffer and check to make sure that they make sense w.r.t. the main contents
cl_char * tempBuffer = new cl_char[ mainSize ];
int numErrors = 0;
for ( size_t i = 0; i < 16; i++ )
{
// Read from this buffer
int which = random_in_range( 0, 1, Action::GetRandSeed() );
if ( which )
error = clEnqueueReadBuffer( queue, subBuffers[ i ], CL_TRUE, 0, subBuffers[ i ].mSize, tempBuffer, 0, NULL, NULL );
else
error = read_buffer_via_kernel( context, queue, subBuffers[ i ], subBuffers[ i ].mSize, tempBuffer );
test_error( error, "Unable to read sub buffer contents" );
if ( memcmp( tempBuffer, contents + subBuffers[ i ].mOrigin, subBuffers[ i ].mSize ) != 0 )
{
log_error( "ERROR: Validation for sub-buffer %ld failed!\n", i );
numErrors++;
}
}
delete [] contents;
delete [] tempBuffer;
Action::FreeRandSeed();
return numErrors;
}