mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-23 23:49:02 +00:00
Initial open source release of OpenCL 2.0 CTS.
This commit is contained in:
25
test_conformance/buffers/CMakeLists.txt
Normal file
25
test_conformance/buffers/CMakeLists.txt
Normal file
@@ -0,0 +1,25 @@
|
||||
set(MODULE_NAME BUFFERS)
|
||||
|
||||
set(${MODULE_NAME}_SOURCES
|
||||
main.c
|
||||
test_buffer_copy.c
|
||||
test_buffer_read.c
|
||||
test_buffer_write.c
|
||||
test_buffer_mem.c
|
||||
array_info.c
|
||||
test_buffer_map.c
|
||||
test_sub_buffers.cpp
|
||||
test_buffer_fill.c
|
||||
test_buffer_migrate.c
|
||||
test_image_migrate.c
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/msvc9.c
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
24
test_conformance/buffers/Jamfile
Normal file
24
test_conformance/buffers/Jamfile
Normal file
@@ -0,0 +1,24 @@
|
||||
project
|
||||
: requirements
|
||||
<toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe test_buffers
|
||||
: array_info.c
|
||||
main.c
|
||||
test_buffer_copy.c
|
||||
test_buffer_map.c
|
||||
test_buffer_mem.c
|
||||
test_buffer_read.c
|
||||
test_buffer_write.c
|
||||
test_buffer_fill.c
|
||||
: <library>../..//glew
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_buffers
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/buffers
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/buffers
|
||||
;
|
||||
|
||||
49
test_conformance/buffers/Makefile
Normal file
49
test_conformance/buffers/Makefile
Normal file
@@ -0,0 +1,49 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c test_buffer_copy.c test_buffer_read.c test_buffer_write.c \
|
||||
test_buffer_mem.c array_info.c test_buffer_map.c \
|
||||
test_sub_buffers.cpp test_buffer_fill.c \
|
||||
test_buffer_migrate.c test_image_migrate.c \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/threadTesting.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/kernelHelpers.c \
|
||||
../../test_common/harness/conversions.c \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
|
||||
|
||||
|
||||
DEFINES =
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
FRAMEWORK = $(SOURCES)
|
||||
HEADERS =
|
||||
TARGET = test_buffers
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -O0 -Wshorten-64-to-32
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%)
|
||||
LIBRARIES = -framework OpenCL -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
|
||||
63
test_conformance/buffers/array_info.c
Normal file
63
test_conformance/buffers/array_info.c
Normal file
@@ -0,0 +1,63 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
|
||||
|
||||
int testBufferSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
cl_mem memobj;
|
||||
cl_int err;
|
||||
size_t w = 32, h = 32, d = 32;
|
||||
size_t retSize;
|
||||
size_t elementSize = sizeof( cl_int );
|
||||
|
||||
memobj = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), elementSize * w*h*d, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
|
||||
err = clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof( size_t ), (void *)&retSize, NULL);
|
||||
if ( err ){
|
||||
log_error( "Error calling clGetMemObjectInfo(): %d\n", err );
|
||||
clReleaseMemObject(memobj);
|
||||
return -1;
|
||||
}
|
||||
if ( (elementSize * w * h * d) != retSize ) {
|
||||
log_error( "Error in clGetMemObjectInfo() check of size\n" );
|
||||
clReleaseMemObject(memobj);
|
||||
return -1;
|
||||
}
|
||||
else{
|
||||
log_info( " CL_MEM_SIZE passed.\n" );
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(memobj);
|
||||
|
||||
return err;
|
||||
|
||||
} // end testArrayElementSize()
|
||||
|
||||
|
||||
// FIXME: need to test other flags
|
||||
|
||||
246
test_conformance/buffers/main.c
Normal file
246
test_conformance/buffers/main.c
Normal file
@@ -0,0 +1,246 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
basefn bufferfn_list[] = {
|
||||
test_buffer_read_async_int,
|
||||
test_buffer_read_async_uint,
|
||||
test_buffer_read_async_long,
|
||||
test_buffer_read_async_ulong,
|
||||
test_buffer_read_async_short,
|
||||
test_buffer_read_async_ushort,
|
||||
test_buffer_read_async_char,
|
||||
test_buffer_read_async_uchar,
|
||||
test_buffer_read_async_float,
|
||||
test_buffer_read_array_barrier_int,
|
||||
test_buffer_read_array_barrier_uint,
|
||||
test_buffer_read_array_barrier_long,
|
||||
test_buffer_read_array_barrier_ulong,
|
||||
test_buffer_read_array_barrier_short,
|
||||
test_buffer_read_array_barrier_ushort,
|
||||
test_buffer_read_array_barrier_char,
|
||||
test_buffer_read_array_barrier_uchar,
|
||||
test_buffer_read_array_barrier_float,
|
||||
test_buffer_read_int,
|
||||
test_buffer_read_uint,
|
||||
test_buffer_read_long,
|
||||
test_buffer_read_ulong,
|
||||
test_buffer_read_short,
|
||||
test_buffer_read_ushort,
|
||||
test_buffer_read_float,
|
||||
0, //test_buffer_read_half,
|
||||
test_buffer_read_char,
|
||||
test_buffer_read_uchar,
|
||||
test_buffer_read_struct,
|
||||
test_buffer_read_random_size,
|
||||
test_buffer_map_read_int,
|
||||
test_buffer_map_read_uint,
|
||||
test_buffer_map_read_long,
|
||||
test_buffer_map_read_ulong,
|
||||
test_buffer_map_read_short,
|
||||
test_buffer_map_read_ushort,
|
||||
test_buffer_map_read_char,
|
||||
test_buffer_map_read_uchar,
|
||||
test_buffer_map_read_float,
|
||||
test_buffer_map_read_struct,
|
||||
|
||||
test_buffer_map_write_int,
|
||||
test_buffer_map_write_uint,
|
||||
test_buffer_map_write_long,
|
||||
test_buffer_map_write_ulong,
|
||||
test_buffer_map_write_short,
|
||||
test_buffer_map_write_ushort,
|
||||
test_buffer_map_write_char,
|
||||
test_buffer_map_write_uchar,
|
||||
test_buffer_map_write_float,
|
||||
test_buffer_map_write_struct,
|
||||
|
||||
test_buffer_write_int,
|
||||
test_buffer_write_uint,
|
||||
test_buffer_write_short,
|
||||
test_buffer_write_ushort,
|
||||
test_buffer_write_char,
|
||||
test_buffer_write_uchar,
|
||||
test_buffer_write_float,
|
||||
0, //test_buffer_write_half,
|
||||
test_buffer_write_long,
|
||||
test_buffer_write_ulong,
|
||||
test_buffer_write_struct,
|
||||
test_buffer_write_async_int,
|
||||
test_buffer_write_async_uint,
|
||||
test_buffer_write_async_short,
|
||||
test_buffer_write_async_ushort,
|
||||
test_buffer_write_async_char,
|
||||
test_buffer_write_async_uchar,
|
||||
test_buffer_write_async_float,
|
||||
test_buffer_write_async_long,
|
||||
test_buffer_write_async_ulong,
|
||||
test_buffer_copy,
|
||||
test_buffer_partial_copy,
|
||||
test_mem_read_write_flags,
|
||||
test_mem_write_flags,
|
||||
test_mem_read_flags,
|
||||
test_mem_copy_host_flags,
|
||||
0, //test_mem_alloc_ref_flags,
|
||||
testBufferSize,
|
||||
|
||||
test_sub_buffers_read_write,
|
||||
test_sub_buffers_read_write_dual_devices,
|
||||
test_sub_buffers_overlapping,
|
||||
|
||||
test_buffer_fill_int,
|
||||
test_buffer_fill_uint,
|
||||
test_buffer_fill_short,
|
||||
test_buffer_fill_ushort,
|
||||
test_buffer_fill_char,
|
||||
test_buffer_fill_uchar,
|
||||
test_buffer_fill_long,
|
||||
test_buffer_fill_ulong,
|
||||
test_buffer_fill_float,
|
||||
test_buffer_fill_struct,
|
||||
|
||||
test_buffer_migrate,
|
||||
test_image_migrate,
|
||||
};
|
||||
|
||||
const char *bufferfn_names[] = {
|
||||
"buffer_read_async_int",
|
||||
"buffer_read_async_uint",
|
||||
"buffer_read_async_long",
|
||||
"buffer_read_async_ulong",
|
||||
"buffer_read_async_short",
|
||||
"buffer_read_async_ushort",
|
||||
"buffer_read_async_char",
|
||||
"buffer_read_async_uchar",
|
||||
"buffer_read_async_float",
|
||||
"buffer_read_array_barrier_int",
|
||||
"buffer_read_array_barrier_uint",
|
||||
"buffer_read_array_barrier_long",
|
||||
"buffer_read_array_barrier_ulong",
|
||||
"buffer_read_array_barrier_short",
|
||||
"buffer_read_array_barrier_ushort",
|
||||
"buffer_read_array_barrier_char",
|
||||
"buffer_read_array_barrier_uchar",
|
||||
"buffer_read_array_barrier_float",
|
||||
"buffer_read_int",
|
||||
"buffer_read_uint",
|
||||
"buffer_read_long",
|
||||
"buffer_read_ulong",
|
||||
"buffer_read_short",
|
||||
"buffer_read_ushort",
|
||||
"buffer_read_float",
|
||||
"buffer_read_half",
|
||||
"buffer_read_char",
|
||||
"buffer_read_uchar",
|
||||
"buffer_read_struct",
|
||||
"buffer_read_random_size",
|
||||
"buffer_map_read_int",
|
||||
"buffer_map_read_uint",
|
||||
"buffer_map_read_long",
|
||||
"buffer_map_read_ulong",
|
||||
"buffer_map_read_short",
|
||||
"buffer_map_read_ushort",
|
||||
"buffer_map_read_char",
|
||||
"buffer_map_read_uchar",
|
||||
"buffer_map_read_float",
|
||||
"buffer_map_read_struct",
|
||||
|
||||
"buffer_map_write_int",
|
||||
"buffer_map_write_uint",
|
||||
"buffer_map_write_long",
|
||||
"buffer_map_write_ulong",
|
||||
"buffer_map_write_short",
|
||||
"buffer_map_write_ushort",
|
||||
"buffer_map_write_char",
|
||||
"buffer_map_write_uchar",
|
||||
"buffer_map_write_float",
|
||||
"buffer_map_write_struct",
|
||||
|
||||
"buffer_write_int",
|
||||
"buffer_write_uint",
|
||||
"buffer_write_short",
|
||||
"buffer_write_ushort",
|
||||
"buffer_write_char",
|
||||
"buffer_write_uchar",
|
||||
"buffer_write_float",
|
||||
"buffer_write_half",
|
||||
"buffer_write_long",
|
||||
"buffer_write_ulong",
|
||||
"buffer_write_struct",
|
||||
"buffer_write_async_int",
|
||||
"buffer_write_async_uint",
|
||||
"buffer_write_async_short",
|
||||
"buffer_write_async_ushort",
|
||||
"buffer_write_async_char",
|
||||
"buffer_write_async_uchar",
|
||||
"buffer_write_async_float",
|
||||
"buffer_write_async_long",
|
||||
"buffer_write_async_ulong",
|
||||
"buffer_copy",
|
||||
"buffer_partial_copy",
|
||||
"mem_read_write_flags",
|
||||
"mem_write_only_flags",
|
||||
"mem_read_only_flags",
|
||||
"mem_copy_host_flags",
|
||||
"mem_alloc_ref_flags",
|
||||
"array_info_size",
|
||||
"sub_buffers_read_write",
|
||||
"sub_buffers_read_write_dual_devices",
|
||||
"sub_buffers_overlapping",
|
||||
"buffer_fill_int",
|
||||
"buffer_fill_uint",
|
||||
"buffer_fill_short",
|
||||
"buffer_fill_ushort",
|
||||
"buffer_fill_char",
|
||||
"buffer_fill_uchar",
|
||||
"buffer_fill_long",
|
||||
"buffer_fill_ulong",
|
||||
"buffer_fill_float",
|
||||
"buffer_fill_struct",
|
||||
"buffer_migrate",
|
||||
"image_migrate",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(bufferfn_names) / sizeof(bufferfn_names[0])) == (sizeof(bufferfn_list) / sizeof(bufferfn_list[0])));
|
||||
|
||||
int num_bufferfns = sizeof(bufferfn_names) / sizeof(char *);
|
||||
|
||||
const cl_mem_flags flag_set[] = {
|
||||
CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_COPY_HOST_PTR,
|
||||
0
|
||||
};
|
||||
const char* flag_set_names[] = {
|
||||
"CL_MEM_ALLOC_HOST_PTR",
|
||||
"CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
|
||||
"CL_MEM_USE_HOST_PTR",
|
||||
"CL_MEM_COPY_HOST_PTR",
|
||||
"0"
|
||||
};
|
||||
|
||||
int main( int argc, const char *argv[] )
|
||||
{
|
||||
return runTestHarness( argc, argv, num_bufferfns, bufferfn_list, bufferfn_names,
|
||||
false, false, 0 );
|
||||
}
|
||||
132
test_conformance/buffers/procs.h
Normal file
132
test_conformance/buffers/procs.h
Normal file
@@ -0,0 +1,132 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef __PROCS_H__
|
||||
#define __PROCS_H__
|
||||
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
|
||||
#ifndef __APPLE__
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
extern const cl_mem_flags flag_set[];
|
||||
extern const char* flag_set_names[];
|
||||
#define NUM_FLAGS 5
|
||||
|
||||
extern int test_buffer_read_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_random_size( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_async_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_async_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_async_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_async_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_async_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_async_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_async_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_async_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_async_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_array_barrier_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_array_barrier_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_array_barrier_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_array_barrier_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_array_barrier_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_array_barrier_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_array_barrier_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_array_barrier_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_read_array_barrier_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_async_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_async_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_async_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_async_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_async_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_async_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_async_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_async_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_write_async_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_copy( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_partial_copy( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int testBufferSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_mem_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_mem_read_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_mem_copy_host_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_mem_alloc_ref_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_read_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_read_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_read_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_read_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_read_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_read_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_read_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_read_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_read_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_read_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
|
||||
extern int test_buffer_map_write_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_write_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_write_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_write_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_write_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_write_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_write_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_write_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_write_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_map_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
|
||||
extern int test_sub_buffers_read_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_sub_buffers_overlapping( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_image_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_buffer_fill_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_fill_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_fill_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_fill_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_fill_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_fill_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_fill_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_fill_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_fill_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
|
||||
#endif // #ifndef __PROCS_H__
|
||||
|
||||
295
test_conformance/buffers/test_buffer_copy.c
Normal file
295
test_conformance/buffers/test_buffer_copy.c
Normal file
@@ -0,0 +1,295 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
|
||||
static int verify_copy_buffer(int *inptr, int *outptr, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if ( outptr[i] != inptr[i] )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int test_copy( cl_command_queue queue, cl_context context, int num_elements, MTdata d )
|
||||
{
|
||||
cl_mem buffers[2];
|
||||
cl_int *int_input_ptr, *int_output_ptr;
|
||||
cl_int err;
|
||||
int i;
|
||||
int src_flag_id, dst_flag_id;
|
||||
int errors = 0;
|
||||
|
||||
size_t min_alignment = get_min_alignment(context);
|
||||
|
||||
int_input_ptr = (cl_int*) align_malloc(sizeof(cl_int) * num_elements, min_alignment);
|
||||
int_output_ptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
|
||||
|
||||
for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
|
||||
for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
|
||||
log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
|
||||
|
||||
for (i=0; i<num_elements; i++){
|
||||
int_input_ptr[i] = (int)genrand_int32( d );
|
||||
int_output_ptr[i] = 0xdeaddead; // seed with incorrect data
|
||||
}
|
||||
|
||||
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
|
||||
buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], sizeof(cl_int) * num_elements, int_input_ptr, &err);
|
||||
else
|
||||
buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], sizeof(cl_int) * num_elements, NULL, &err);
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error(err, " clCreateBuffer failed\n" );
|
||||
align_free( (void *)int_input_ptr );
|
||||
align_free( (void *)int_output_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
|
||||
buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id], sizeof(cl_int) * num_elements, int_output_ptr, &err);
|
||||
else
|
||||
buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id], sizeof(cl_int) * num_elements, NULL, &err);
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error(err, " clCreateBuffer failed\n" );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
align_free( (void *)int_input_ptr );
|
||||
align_free( (void *)int_output_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
|
||||
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)int_input_ptr, 0, NULL, NULL);
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueWriteBuffer failed" );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseMemObject( buffers[1] );
|
||||
align_free( (void *)int_output_ptr );
|
||||
align_free( (void *)int_input_ptr );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
err = clEnqueueCopyBuffer(queue, buffers[0], buffers[1], 0, 0, sizeof(cl_int)*num_elements, 0, NULL, NULL);
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clCopyArray failed" );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseMemObject( buffers[1] );
|
||||
align_free( (void *)int_output_ptr );
|
||||
align_free( (void *)int_input_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(int)*num_elements, (void *)int_output_ptr, 0, NULL, NULL );
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueReadBuffer failed" );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseMemObject( buffers[1] );
|
||||
align_free( (void *)int_output_ptr );
|
||||
align_free( (void *)int_input_ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( verify_copy_buffer(int_input_ptr, int_output_ptr, num_elements) ){
|
||||
log_error( " test failed\n" );
|
||||
errors++;
|
||||
}
|
||||
else{
|
||||
log_info( " test passed\n" );
|
||||
}
|
||||
// cleanup
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseMemObject( buffers[1] );
|
||||
} // dst flags
|
||||
} // src flags
|
||||
// cleanup
|
||||
align_free( (void *)int_output_ptr );
|
||||
align_free( (void *)int_input_ptr );
|
||||
|
||||
return errors;
|
||||
|
||||
} // end test_copy()
|
||||
|
||||
|
||||
static int testPartialCopy( cl_command_queue queue, cl_context context, int num_elements, cl_uint srcStart, cl_uint dstStart, int size, MTdata d )
|
||||
{
|
||||
cl_mem buffers[2];
|
||||
int *inptr, *outptr;
|
||||
cl_int err;
|
||||
int i;
|
||||
int src_flag_id, dst_flag_id;
|
||||
int errors = 0;
|
||||
|
||||
size_t min_alignment = get_min_alignment(context);
|
||||
|
||||
inptr = (int *)align_malloc( sizeof(int) * num_elements, min_alignment);
|
||||
if ( ! inptr ){
|
||||
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(int) * num_elements );
|
||||
return -1;
|
||||
}
|
||||
outptr = (int *)align_malloc( sizeof(int) * num_elements, min_alignment);
|
||||
if ( ! outptr ){
|
||||
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(int) * num_elements );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
|
||||
for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
|
||||
log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
|
||||
|
||||
for (i=0; i<num_elements; i++){
|
||||
inptr[i] = (int)genrand_int32( d );
|
||||
outptr[i] = (int)0xdeaddead; // seed with incorrect data
|
||||
}
|
||||
|
||||
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
|
||||
buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], sizeof(cl_int) * num_elements, inptr, &err);
|
||||
else
|
||||
buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], sizeof(cl_int) * num_elements, NULL, &err);
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error(err, " clCreateBuffer failed\n" )
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
|
||||
buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id], sizeof(cl_int) * num_elements, outptr, &err);
|
||||
else
|
||||
buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id], sizeof(cl_int) * num_elements, NULL, &err);
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error(err, " clCreateBuffer failed\n" );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)){
|
||||
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueWriteBuffer failed" );
|
||||
clReleaseMemObject( buffers[1] );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
err = clEnqueueCopyBuffer(queue, buffers[0], buffers[1], srcStart*sizeof(cl_int), dstStart*sizeof(cl_int), sizeof(cl_int)*size, 0, NULL, NULL);
|
||||
if ( err != CL_SUCCESS){
|
||||
print_error( err, "clEnqueueCopyBuffer failed" );
|
||||
clReleaseMemObject( buffers[1] );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(int)*num_elements, (void *)outptr, 0, NULL, NULL );
|
||||
if ( err != CL_SUCCESS){
|
||||
print_error( err, "clEnqueueReadBuffer failed" );
|
||||
clReleaseMemObject( buffers[1] );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( verify_copy_buffer(inptr + srcStart, outptr + dstStart, size) ){
|
||||
log_error("buffer_COPY test failed\n");
|
||||
errors++;
|
||||
}
|
||||
else{
|
||||
log_info("buffer_COPY test passed\n");
|
||||
}
|
||||
// cleanup
|
||||
clReleaseMemObject( buffers[1] );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
} // dst mem flags
|
||||
} // src mem flags
|
||||
// cleanup
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
|
||||
return errors;
|
||||
|
||||
} // end testPartialCopy()
|
||||
|
||||
|
||||
int test_buffer_copy( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int i, err = 0;
|
||||
int size;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
// test the preset size
|
||||
log_info( "set size: %d: ", num_elements );
|
||||
if (test_copy( queue, context, num_elements, d ))
|
||||
err++;
|
||||
|
||||
// now test random sizes
|
||||
for ( i = 0; i < 8; i++ ){
|
||||
size = (int)get_random_float(2.f,131072.f, d);
|
||||
log_info( "random size: %d: ", size );
|
||||
if (test_copy( queue, context, size, d ))
|
||||
err++;
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
|
||||
return err;
|
||||
|
||||
} // end test_buffer_copy()
|
||||
|
||||
|
||||
int test_buffer_partial_copy( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int i, err = 0;
|
||||
int size;
|
||||
cl_uint srcStart, dstStart;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
// now test copy of partial sizes
|
||||
for ( i = 0; i < 8; i++ ){
|
||||
srcStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
|
||||
size = (int)get_random_float( 8.f, (float)(num_elements - srcStart), d );
|
||||
dstStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - size), d );
|
||||
log_info( "random partial copy from %d to %d, size: %d: ", (int)srcStart, (int)dstStart, size );
|
||||
if (testPartialCopy( queue, context, num_elements, srcStart, dstStart, size, d ))
|
||||
err++;
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
return err;
|
||||
|
||||
} // end test_buffer_partial_copy()
|
||||
|
||||
1560
test_conformance/buffers/test_buffer_fill.c
Normal file
1560
test_conformance/buffers/test_buffer_fill.c
Normal file
File diff suppressed because it is too large
Load Diff
703
test_conformance/buffers/test_buffer_map.c
Normal file
703
test_conformance/buffers/test_buffer_map.c
Normal file
@@ -0,0 +1,703 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
|
||||
#define TEST_PRIME_INT ((1<<16)+1)
|
||||
#define TEST_PRIME_UINT ((1U<<16)+1U)
|
||||
#define TEST_PRIME_LONG ((1LL<<32)+1LL)
|
||||
#define TEST_PRIME_ULONG ((1ULL<<32)+1ULL)
|
||||
#define TEST_PRIME_SHORT ((1S<<8)+1S)
|
||||
#define TEST_PRIME_FLOAT (float)3.40282346638528860e+38
|
||||
#define TEST_PRIME_HALF 119.f
|
||||
#define TEST_BOOL true
|
||||
#define TEST_PRIME_CHAR 0x77
|
||||
|
||||
|
||||
#ifndef TestStruct
|
||||
typedef struct{
|
||||
int a;
|
||||
float b;
|
||||
} TestStruct;
|
||||
#endif
|
||||
|
||||
|
||||
//--- the code for the kernel executables
|
||||
static const char *buffer_read_int_kernel_code[] = {
|
||||
"__kernel void test_buffer_read_int(__global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1<<16)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_int2(__global int2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1<<16)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_int4(__global int4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1<<16)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_int8(__global int8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1<<16)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_int16(__global int16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1<<16)+1);\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *int_kernel_name[] = { "test_buffer_read_int", "test_buffer_read_int2", "test_buffer_read_int4", "test_buffer_read_int8", "test_buffer_read_int16" };
|
||||
|
||||
static const char *buffer_read_uint_kernel_code[] = {
|
||||
"__kernel void test_buffer_read_uint(__global uint *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1U<<16)+1U);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_uint2(__global uint2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1U<<16)+1U);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_uint4(__global uint4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1U<<16)+1U);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_uint8(__global uint8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1U<<16)+1U);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_uint16(__global uint16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1U<<16)+1U);\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *uint_kernel_name[] = { "test_buffer_read_uint", "test_buffer_read_uint2", "test_buffer_read_uint4", "test_buffer_read_uint8", "test_buffer_read_uint16" };
|
||||
|
||||
static const char *buffer_read_long_kernel_code[] = {
|
||||
"__kernel void test_buffer_read_long(__global long *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1L<<32)+1L);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_long2(__global long2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1L<<32)+1L);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_long4(__global long4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1L<<32)+1L);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_long8(__global long8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1L<<32)+1L);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_long16(__global long16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1L<<32)+1L);\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *long_kernel_name[] = { "test_buffer_read_long", "test_buffer_read_long2", "test_buffer_read_long4", "test_buffer_read_long8", "test_buffer_read_long16" };
|
||||
|
||||
static const char *buffer_read_ulong_kernel_code[] = {
|
||||
"__kernel void test_buffer_read_ulong(__global ulong *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1UL<<32)+1UL);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_ulong2(__global ulong2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1UL<<32)+1UL);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_ulong4(__global ulong4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1UL<<32)+1UL);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_ulong8(__global ulong8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1UL<<32)+1UL);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_ulong16(__global ulong16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = ((1UL<<32)+1UL);\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *ulong_kernel_name[] = { "test_buffer_read_ulong", "test_buffer_read_ulong2", "test_buffer_read_ulong4", "test_buffer_read_ulong8", "test_buffer_read_ulong16" };
|
||||
|
||||
static const char *buffer_read_short_kernel_code[] = {
|
||||
"__kernel void test_buffer_read_short(__global short *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (short)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_short2(__global short2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (short)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_short4(__global short4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (short)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_short8(__global short8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (short)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_short16(__global short16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (short)((1<<8)+1);\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *short_kernel_name[] = { "test_buffer_read_short", "test_buffer_read_short2", "test_buffer_read_short4", "test_buffer_read_short8", "test_buffer_read_short16" };
|
||||
|
||||
|
||||
static const char *buffer_read_ushort_kernel_code[] = {
|
||||
"__kernel void test_buffer_read_ushort(__global ushort *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (ushort)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_ushort2(__global ushort2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (ushort)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_ushort4(__global ushort4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (ushort)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_ushort8(__global ushort8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (ushort)((1<<8)+1);\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_ushort16(__global ushort16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (ushort)((1<<8)+1);\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *ushort_kernel_name[] = { "test_buffer_read_ushort", "test_buffer_read_ushort2", "test_buffer_read_ushort4", "test_buffer_read_ushort8", "test_buffer_read_ushort16" };
|
||||
|
||||
|
||||
static const char *buffer_read_float_kernel_code[] = {
|
||||
"__kernel void test_buffer_read_float(__global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)3.40282346638528860e+38;\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_float2(__global float2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)3.40282346638528860e+38;\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_float4(__global float4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)3.40282346638528860e+38;\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_float8(__global float8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)3.40282346638528860e+38;\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_float16(__global float16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)3.40282346638528860e+38;\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffer_read_float2", "test_buffer_read_float4", "test_buffer_read_float8", "test_buffer_read_float16" };
|
||||
|
||||
|
||||
static const char *buffer_read_char_kernel_code[] = {
|
||||
"__kernel void test_buffer_read_char(__global char *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (char)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_char2(__global char2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (char)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_char4(__global char4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (char)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_char8(__global char8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (char)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_char16(__global char16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (char)'w';\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *char_kernel_name[] = { "test_buffer_read_char", "test_buffer_read_char2", "test_buffer_read_char4", "test_buffer_read_char8", "test_buffer_read_char16" };
|
||||
|
||||
|
||||
static const char *buffer_read_uchar_kernel_code[] = {
|
||||
"__kernel void test_buffer_read_uchar(__global uchar *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = 'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_uchar2(__global uchar2 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (uchar)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_uchar4(__global uchar4 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (uchar)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_uchar8(__global uchar8 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (uchar)'w';\n"
|
||||
"}\n",
|
||||
|
||||
"__kernel void test_buffer_read_uchar16(__global uchar16 *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (uchar)'w';\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *uchar_kernel_name[] = { "test_buffer_read_uchar", "test_buffer_read_uchar2", "test_buffer_read_uchar4", "test_buffer_read_uchar8", "test_buffer_read_uchar16" };
|
||||
|
||||
|
||||
static const char *buffer_read_struct_kernel_code[] = {
|
||||
"typedef struct{\n"
|
||||
"int a;\n"
|
||||
"float b;\n"
|
||||
"} TestStruct;\n"
|
||||
"__kernel void test_buffer_read_struct(__global TestStruct *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid].a = ((1<<16)+1);\n"
|
||||
" dst[tid].b = (float)3.40282346638528860e+38;\n"
|
||||
"}\n" };
|
||||
|
||||
static const char *struct_kernel_name[] = { "test_buffer_read_struct" };
|
||||
|
||||
|
||||
//--- the verify functions
|
||||
static int verify_read_int(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
int *outptr = (int *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if ( outptr[i] != TEST_PRIME_INT )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_uint(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
cl_uint *outptr = (cl_uint *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if ( outptr[i] != TEST_PRIME_UINT )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_long(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
cl_long *outptr = (cl_long *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if ( outptr[i] != TEST_PRIME_LONG )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_ulong(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
cl_ulong *outptr = (cl_ulong *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if ( outptr[i] != TEST_PRIME_ULONG )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_short(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
short *outptr = (short *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if ( outptr[i] != (short)((1<<8)+1) )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_ushort(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
cl_ushort *outptr = (cl_ushort *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if ( outptr[i] != (cl_ushort)((1<<8)+1) )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_float( void *ptr, int n )
|
||||
{
|
||||
int i;
|
||||
float *outptr = (float *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if ( outptr[i] != TEST_PRIME_FLOAT )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_char(void *ptr, int n)
|
||||
{
|
||||
int i;
|
||||
char *outptr = (char *)ptr;
|
||||
|
||||
for (i=0; i<n; i++){
|
||||
if ( outptr[i] != TEST_PRIME_CHAR )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_uchar( void *ptr, int n )
|
||||
{
|
||||
int i;
|
||||
cl_uchar *outptr = (cl_uchar *)ptr;
|
||||
|
||||
for ( i = 0; i < n; i++ ){
|
||||
if ( outptr[i] != TEST_PRIME_CHAR )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int verify_read_struct( void *ptr, int n )
|
||||
{
|
||||
int i;
|
||||
TestStruct *outptr = (TestStruct *)ptr;
|
||||
|
||||
for ( i = 0; i < n; i++ ){
|
||||
if ( ( outptr[i].a != TEST_PRIME_INT ) ||
|
||||
( outptr[i].b != TEST_PRIME_FLOAT ) )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//----- the test functions
|
||||
static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
|
||||
const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
|
||||
{
|
||||
cl_mem buffers[5];
|
||||
void *outptr[5];
|
||||
cl_program program[5];
|
||||
cl_kernel kernel[5];
|
||||
size_t threads[3], localThreads[3];
|
||||
cl_int err;
|
||||
int i;
|
||||
size_t ptrSizes[5];
|
||||
int src_flag_id;
|
||||
int total_errors = 0;
|
||||
void *mappedPtr;
|
||||
|
||||
size_t min_alignment = get_min_alignment(context);
|
||||
|
||||
threads[0] = (cl_uint)num_elements;
|
||||
|
||||
ptrSizes[0] = size;
|
||||
ptrSizes[1] = ptrSizes[0] << 1;
|
||||
ptrSizes[2] = ptrSizes[1] << 1;
|
||||
ptrSizes[3] = ptrSizes[2] << 1;
|
||||
ptrSizes[4] = ptrSizes[3] << 1;
|
||||
|
||||
//embedded devices don't support long/ulong so skip over
|
||||
if (! gHasLong && strstr(type,"long"))
|
||||
return 0;
|
||||
|
||||
for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
|
||||
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
|
||||
|
||||
for ( i = 0; i < loops; i++ ){
|
||||
outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
|
||||
if ( ! outptr[i] ){
|
||||
log_error( " unable to allocate %d bytes of memory\n", (int)ptrSizes[i] * num_elements );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
|
||||
buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, outptr[i], &err);
|
||||
else
|
||||
buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err);
|
||||
|
||||
if ( ! buffers[i] | err){
|
||||
print_error(err, "clCreateBuffer failed\n" );
|
||||
align_free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
|
||||
if ( err ){
|
||||
log_error( " Error creating program for %s\n", type );
|
||||
clReleaseMemObject( buffers[i] );
|
||||
align_free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clSetKernelArg failed\n" );
|
||||
clReleaseKernel( kernel[i] );
|
||||
clReleaseProgram( program[i] );
|
||||
clReleaseMemObject( buffers[i] );
|
||||
align_free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (cl_uint)num_elements;
|
||||
|
||||
err = get_max_common_work_group_size( context, kernel[i], threads[0], &localThreads[0] );
|
||||
test_error( err, "Unable to get work group size to use" );
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed\n" );
|
||||
clReleaseKernel( kernel[i] );
|
||||
clReleaseProgram( program[i] );
|
||||
clReleaseMemObject( buffers[i] );
|
||||
align_free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
mappedPtr = clEnqueueMapBuffer(queue, buffers[i], CL_TRUE, CL_MAP_READ, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueMapBuffer failed" );
|
||||
clReleaseKernel( kernel[i] );
|
||||
clReleaseProgram( program[i] );
|
||||
clReleaseMemObject( buffers[i] );
|
||||
align_free( outptr[i] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (fn(mappedPtr, num_elements*(1<<i))){
|
||||
log_error(" %s%d test failed\n", type, 1<<i);
|
||||
total_errors++;
|
||||
}
|
||||
else{
|
||||
log_info(" %s%d test passed\n", type, 1<<i);
|
||||
}
|
||||
|
||||
err = clEnqueueUnmapMemObject(queue, buffers[i], mappedPtr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueUnmapMemObject failed");
|
||||
|
||||
// cleanup
|
||||
clReleaseKernel( kernel[i] );
|
||||
clReleaseProgram( program[i] );
|
||||
clReleaseMemObject( buffers[i] );
|
||||
|
||||
// If we are using the outptr[i] as backing via USE_HOST_PTR we need to make sure we are done before freeing.
|
||||
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)) {
|
||||
err = clFinish(queue);
|
||||
test_error(err, "clFinish failed");
|
||||
}
|
||||
align_free( outptr[i] );
|
||||
}
|
||||
} // cl_mem_flags
|
||||
|
||||
return total_errors;
|
||||
|
||||
} // end test_buffer_map_read()
|
||||
|
||||
|
||||
#define DECLARE_LOCK_TEST(type, realType) \
|
||||
int test_buffer_map_read_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
|
||||
{ \
|
||||
return test_buffer_map_read( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
|
||||
buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
|
||||
}
|
||||
|
||||
DECLARE_LOCK_TEST(int, cl_int)
|
||||
DECLARE_LOCK_TEST(uint, cl_uint)
|
||||
DECLARE_LOCK_TEST(long, cl_long)
|
||||
DECLARE_LOCK_TEST(ulong, cl_ulong)
|
||||
DECLARE_LOCK_TEST(short, cl_short)
|
||||
DECLARE_LOCK_TEST(ushort, cl_ushort)
|
||||
DECLARE_LOCK_TEST(char, cl_char)
|
||||
DECLARE_LOCK_TEST(uchar, cl_uchar)
|
||||
DECLARE_LOCK_TEST(float, cl_float)
|
||||
|
||||
int test_buffer_map_read_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
int (*foo)(void *,int);
|
||||
foo = verify_read_struct;
|
||||
|
||||
return test_buffer_map_read( deviceID, context, queue, num_elements, sizeof( TestStruct ), (char*)"struct", 1,
|
||||
buffer_read_struct_kernel_code, struct_kernel_name, foo );
|
||||
|
||||
} // end test_buffer_map_struct_read()
|
||||
|
||||
524
test_conformance/buffers/test_buffer_mem.c
Normal file
524
test_conformance/buffers/test_buffer_mem.c
Normal file
@@ -0,0 +1,524 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#ifndef uchar
|
||||
typedef unsigned char uchar;
|
||||
#endif
|
||||
|
||||
#define USE_LOCAL_WORK_GROUP 1
|
||||
|
||||
|
||||
const char *mem_read_write_kernel_code =
|
||||
"__kernel void test_mem_read_write(__global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = dst[tid]+1;\n"
|
||||
"}\n";
|
||||
|
||||
const char *mem_read_kernel_code =
|
||||
"__kernel void test_mem_read(__global int *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = src[tid]+1;\n"
|
||||
"}\n";
|
||||
|
||||
const char *mem_write_kernel_code =
|
||||
"__kernel void test_mem_write(__global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = dst[tid]+1;\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
static int verify_mem( int *outptr, int n )
|
||||
{
|
||||
int i;
|
||||
|
||||
for ( i = 0; i < n; i++ ){
|
||||
if ( outptr[i] != ( i + 1 ) )
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
cl_mem buffers[1];
|
||||
cl_int *inptr, *outptr;
|
||||
cl_program program[1];
|
||||
cl_kernel kernel[1];
|
||||
size_t global_work_size[3];
|
||||
#ifdef USE_LOCAL_WORK_GROUP
|
||||
size_t local_work_size[3];
|
||||
#endif
|
||||
cl_int err;
|
||||
int i;
|
||||
|
||||
size_t min_alignment = get_min_alignment(context);
|
||||
|
||||
global_work_size[0] = (cl_uint)num_elements;
|
||||
|
||||
inptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
|
||||
outptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, NULL, &err);
|
||||
if (err != CL_SUCCESS) {
|
||||
print_error( err, "clCreateBuffer failed");
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
inptr[i] = i;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS) {
|
||||
print_error( err, "clEnqueueWriteBuffer failed");
|
||||
clReleaseMemObject( buffers[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
|
||||
if (err){
|
||||
clReleaseMemObject( buffers[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_WORK_GROUP
|
||||
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
|
||||
test_error( err, "Unable to get work group size to use" );
|
||||
#endif
|
||||
|
||||
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clSetKernelArg failed" );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_WORK_GROUP
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
|
||||
#endif
|
||||
if (err != CL_SUCCESS){
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueReadBuffer failed" );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (verify_mem(outptr, num_elements)){
|
||||
log_error("buffer_MEM_READ_WRITE test failed\n");
|
||||
err = -1;
|
||||
}
|
||||
else{
|
||||
log_info("buffer_MEM_READ_WRITE test passed\n");
|
||||
err = 0;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
|
||||
return err;
|
||||
} // end test_mem_read_write()
|
||||
|
||||
|
||||
int test_mem_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
cl_mem buffers[1];
|
||||
int *inptr, *outptr;
|
||||
cl_program program[1];
|
||||
cl_kernel kernel[1];
|
||||
size_t global_work_size[3];
|
||||
#ifdef USE_LOCAL_WORK_GROUP
|
||||
size_t local_work_size[3];
|
||||
#endif
|
||||
cl_int err;
|
||||
int i;
|
||||
|
||||
size_t min_alignment = get_min_alignment(context);
|
||||
|
||||
global_work_size[0] = (cl_uint)num_elements;
|
||||
|
||||
inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
|
||||
if ( ! inptr ){
|
||||
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
|
||||
return -1;
|
||||
}
|
||||
outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
|
||||
if ( ! outptr ){
|
||||
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
print_error(err, "clCreateBuffer failed\n");
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
inptr[i] = i;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS){
|
||||
print_error( err, "clEnqueueWriteBuffer failed" );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_write_kernel_code, "test_mem_write" );
|
||||
if (err){
|
||||
clReleaseMemObject( buffers[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_WORK_GROUP
|
||||
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
|
||||
test_error( err, "Unable to get work group size to use" );
|
||||
#endif
|
||||
|
||||
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clSetKernelArg failed");
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_WORK_GROUP
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
|
||||
#endif
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed" );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "Error reading array" );
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
|
||||
return err;
|
||||
} // end test_mem_write()
|
||||
|
||||
|
||||
int test_mem_read_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
cl_mem buffers[2];
|
||||
int *inptr, *outptr;
|
||||
cl_program program[1];
|
||||
cl_kernel kernel[1];
|
||||
size_t global_work_size[3];
|
||||
#ifdef USE_LOCAL_WORK_GROUP
|
||||
size_t local_work_size[3];
|
||||
#endif
|
||||
cl_int err;
|
||||
int i;
|
||||
|
||||
size_t min_alignment = get_min_alignment(context);
|
||||
|
||||
global_work_size[0] = (cl_uint)num_elements;
|
||||
|
||||
inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
|
||||
if ( ! inptr ){
|
||||
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
|
||||
return -1;
|
||||
}
|
||||
outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
|
||||
if ( ! outptr ){
|
||||
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error(err, " clCreateBuffer failed to create READ_ONLY array\n" );
|
||||
align_free( (void *)outptr );
|
||||
align_free( (void *)inptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
inptr[i] = i;
|
||||
|
||||
buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err);
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error(err, " clCreateBuffer failed to create MEM_ALLOC_GLOBAL_POOL array\n" );
|
||||
clReleaseMemObject( buffers[0]) ;
|
||||
align_free( (void *)inptr );
|
||||
align_free( (void *)outptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueWriteBuffer() failed");
|
||||
clReleaseMemObject( buffers[1]) ;
|
||||
clReleaseMemObject( buffers[0]) ;
|
||||
align_free( (void *)inptr );
|
||||
align_free( (void *)outptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_kernel_code, "test_mem_read" );
|
||||
if ( err ){
|
||||
clReleaseMemObject( buffers[1]) ;
|
||||
clReleaseMemObject( buffers[0]) ;
|
||||
align_free( (void *)inptr );
|
||||
align_free( (void *)outptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_WORK_GROUP
|
||||
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
|
||||
test_error( err, "Unable to get work group size to use" );
|
||||
#endif
|
||||
|
||||
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
|
||||
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&buffers[1] );
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clSetKernelArgs failed" );
|
||||
clReleaseMemObject( buffers[1]) ;
|
||||
clReleaseMemObject( buffers[0]) ;
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)inptr );
|
||||
align_free( (void *)outptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_WORK_GROUP
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
|
||||
#endif
|
||||
if (err != CL_SUCCESS){
|
||||
print_error( err, "clEnqueueNDRangeKernel failed" );
|
||||
clReleaseMemObject( buffers[1]) ;
|
||||
clReleaseMemObject( buffers[0]) ;
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)inptr );
|
||||
align_free( (void *)outptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
|
||||
if ( err != CL_SUCCESS ){
|
||||
print_error( err, "clEnqueueReadBuffer failed" );
|
||||
clReleaseMemObject( buffers[1]) ;
|
||||
clReleaseMemObject( buffers[0]) ;
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)inptr );
|
||||
align_free( (void *)outptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (verify_mem(outptr, num_elements)){
|
||||
log_error( " CL_MEM_READ_ONLY test failed\n" );
|
||||
err = -1;
|
||||
}
|
||||
else{
|
||||
log_info( " CL_MEM_READ_ONLY test passed\n" );
|
||||
err = 0;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject( buffers[1]) ;
|
||||
clReleaseMemObject( buffers[0]) ;
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)inptr );
|
||||
align_free( (void *)outptr );
|
||||
|
||||
return err;
|
||||
|
||||
} // end test_mem_read()
|
||||
|
||||
|
||||
int test_mem_copy_host_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
cl_mem buffers[1];
|
||||
int *ptr;
|
||||
cl_program program[1];
|
||||
cl_kernel kernel[1];
|
||||
size_t global_work_size[3];
|
||||
#ifdef USE_LOCAL_WORK_GROUP
|
||||
size_t local_work_size[3];
|
||||
#endif
|
||||
cl_int err;
|
||||
int i;
|
||||
|
||||
size_t min_alignment = get_min_alignment(context);
|
||||
|
||||
global_work_size[0] = (cl_uint)num_elements;
|
||||
|
||||
ptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
|
||||
if ( ! ptr ){
|
||||
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
ptr[i] = i;
|
||||
|
||||
buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, (void *)ptr, &err);
|
||||
if (err != CL_SUCCESS){
|
||||
print_error(err, "clCreateBuffer failed for CL_MEM_COPY_HOST_PTR\n");
|
||||
align_free( (void *)ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
|
||||
if (err){
|
||||
clReleaseMemObject( buffers[0] );
|
||||
align_free( (void *)ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_WORK_GROUP
|
||||
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
|
||||
test_error( err, "Unable to get work group size to use" );
|
||||
#endif
|
||||
|
||||
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
|
||||
if (err != CL_SUCCESS){
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef USE_LOCAL_WORK_GROUP
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
|
||||
#else
|
||||
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
|
||||
#endif
|
||||
if (err != CL_SUCCESS){
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)ptr, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS){
|
||||
log_error("CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_CONSTANT_POOL failed.\n");
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)ptr );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( verify_mem( ptr, num_elements ) ){
|
||||
log_error("CL_MEM_COPY_HOST_PTR test failed\n");
|
||||
err = -1;
|
||||
}
|
||||
else{
|
||||
log_info("CL_MEM_COPY_HOST_PTR test passed\n");
|
||||
err = 0;
|
||||
}
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject( buffers[0] );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseProgram( program[0] );
|
||||
align_free( (void *)ptr );
|
||||
|
||||
return err;
|
||||
|
||||
} // end test_mem_copy_host_flags()
|
||||
|
||||
417
test_conformance/buffers/test_buffer_migrate.c
Normal file
417
test_conformance/buffers/test_buffer_migrate.c
Normal file
@@ -0,0 +1,417 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
#define MAX_SUB_DEVICES 16 // Limit the sub-devices to ensure no out of resource errors.
|
||||
#define BUFFER_SIZE 1024
|
||||
|
||||
// Kernel source code
|
||||
static const char *buffer_migrate_kernel_code =
|
||||
"__kernel void test_buffer_migrate(__global uint *dst, __global uint *src1, __global uint *src2, uint x)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" dst[tid] = src1[tid] ^ src2[tid] ^ x;\n"
|
||||
"}\n";
|
||||
|
||||
enum migrations { MIGRATE_PREFERRED, // migrate to the preferred sub-device
|
||||
MIGRATE_NON_PREFERRED, // migrate to a randomly chosen non-preferred sub-device
|
||||
MIGRATE_RANDOM, // migrate to a randomly chosen sub-device with randomly chosen flags
|
||||
NUMBER_OF_MIGRATIONS };
|
||||
|
||||
static cl_mem init_buffer(cl_command_queue cmd_q, cl_mem buffer, cl_uint *data)
|
||||
{
|
||||
cl_int err;
|
||||
|
||||
if (buffer) {
|
||||
if ((err = clEnqueueWriteBuffer(cmd_q, buffer, CL_TRUE, 0, sizeof(cl_uint)*BUFFER_SIZE, data, 0, NULL, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed on enqueue write of buffer data.");
|
||||
}
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static cl_int migrateMemObject(enum migrations migrate, cl_command_queue *queues, cl_mem *mem_objects, cl_uint num_devices, cl_mem_migration_flags *flags, MTdata d)
|
||||
{
|
||||
cl_uint i, j;
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
for (i=0; i<num_devices; i++) {
|
||||
j = genrand_int32(d) % num_devices;
|
||||
flags[i] = 0;
|
||||
switch (migrate) {
|
||||
case MIGRATE_PREFERRED:
|
||||
// Force the device to be preferred
|
||||
j = i;
|
||||
break;
|
||||
case MIGRATE_NON_PREFERRED:
|
||||
// Coerce the device to be non-preferred
|
||||
if ((j == i) && (num_devices > 1)) j = (j+1) % num_devices;
|
||||
break;
|
||||
case MIGRATE_RANDOM:
|
||||
// Choose a random set of flags
|
||||
flags[i] = (cl_mem_migration_flags)(genrand_int32(d) & (CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED));;
|
||||
break;
|
||||
}
|
||||
if ((err = clEnqueueMigrateMemObjects(queues[j], 1, (const cl_mem *)(&mem_objects[i]), flags[i], 0, NULL, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed migrating memory object.");
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static cl_int restoreBuffer(cl_command_queue *queues, cl_mem *buffers, cl_uint num_devices, cl_mem_migration_flags *flags, cl_uint *buffer)
|
||||
{
|
||||
cl_uint i, j;
|
||||
cl_int err;
|
||||
|
||||
// If the buffer was previously migrated with undefined content, reload the content.
|
||||
|
||||
for (i=0; i<num_devices; i++) {
|
||||
if (flags[i] & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) {
|
||||
if ((err = clEnqueueWriteBuffer(queues[i], buffers[i], CL_TRUE, 0, sizeof(cl_uint)*BUFFER_SIZE, buffer, 0, NULL, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed on restoration enqueue write of buffer data.");
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
int test_buffer_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int failed = 0;
|
||||
cl_uint i, j;
|
||||
cl_int err;
|
||||
cl_uint max_sub_devices = 0;
|
||||
cl_uint num_devices, num_devices_limited;
|
||||
cl_uint A[BUFFER_SIZE], B[BUFFER_SIZE], C[BUFFER_SIZE];
|
||||
cl_uint test_number = 1;
|
||||
cl_device_affinity_domain domain, domains;
|
||||
cl_device_id *devices;
|
||||
cl_command_queue *queues;
|
||||
cl_mem_migration_flags *flagsA, *flagsB, *flagsC;
|
||||
cl_device_partition_property property[] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0, 0};
|
||||
cl_mem *bufferA, *bufferB, *bufferC;
|
||||
cl_program program = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
cl_context ctx = NULL; // context for all sub-devices
|
||||
enum migrations migrateA, migrateB, migrateC;
|
||||
MTdata d = init_genrand(gRandomSeed);
|
||||
const size_t wgs[1] = {BUFFER_SIZE};
|
||||
|
||||
/* Allocate arrays whose size varies according to the maximum number of sub-devices */
|
||||
if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_sub_devices), &max_sub_devices, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_COMPUTE_UNITS) failed");
|
||||
return -1;
|
||||
}
|
||||
if (max_sub_devices < 1) {
|
||||
log_error("ERROR: Invalid number of compute units returned.\n");
|
||||
return -1;
|
||||
}
|
||||
devices = (cl_device_id *)malloc(max_sub_devices * sizeof(cl_device_id));
|
||||
queues = (cl_command_queue *)malloc(max_sub_devices * sizeof(cl_command_queue));
|
||||
flagsA = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
|
||||
flagsB = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
|
||||
flagsC = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
|
||||
bufferA = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
|
||||
bufferB = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
|
||||
bufferC = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
|
||||
|
||||
if ((devices == NULL) || (queues == NULL) ||
|
||||
(flagsA == NULL) || (flagsB == NULL) || (flagsC == NULL) ||
|
||||
(bufferA == NULL) || (bufferB == NULL) || (bufferC == NULL)) {
|
||||
log_error("ERROR: Failed to successfully allocate required local buffers.\n");
|
||||
failed = -1;
|
||||
goto cleanup_allocations;
|
||||
}
|
||||
|
||||
for (i=0; i<max_sub_devices; i++) {
|
||||
devices[i] = NULL;
|
||||
queues [i] = NULL;
|
||||
bufferA[i] = bufferB[i] = bufferC[i] = NULL;
|
||||
}
|
||||
|
||||
for (i=0; i<BUFFER_SIZE; i++) {
|
||||
A[i] = genrand_int32(d);
|
||||
B[i] = genrand_int32(d);
|
||||
}
|
||||
|
||||
// Attempt to partition the device along each of the allowed affinity domain.
|
||||
if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(domains), &domains, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "clGetDeviceInfo(CL_PARTITION_AFFINITY_DOMAIN) failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
domains &= (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE |
|
||||
CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE | CL_DEVICE_AFFINITY_DOMAIN_NUMA);
|
||||
|
||||
do {
|
||||
if (domains) {
|
||||
for (domain = 1; (domain & domains) == 0; domain <<= 1) {};
|
||||
domains &= ~domain;
|
||||
} else {
|
||||
domain = 0;
|
||||
}
|
||||
|
||||
// Determine the number of partitions for the device given the specific domain.
|
||||
if (domain) {
|
||||
property[1] = domain;
|
||||
err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, -1, NULL, &num_devices);
|
||||
if ((err != CL_SUCCESS) || (num_devices == 0)) {
|
||||
print_error(err, "Obtaining the number of partions by affinity failed.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
} else {
|
||||
num_devices = 1;
|
||||
}
|
||||
|
||||
if (num_devices > 1) {
|
||||
// Create each of the sub-devices and a corresponding context.
|
||||
if ((err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, num_devices, devices, &num_devices)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed creating sub devices.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Create a context containing all the sub-devices
|
||||
ctx = clCreateContext(NULL, num_devices, devices, notify_callback, NULL, &err);
|
||||
if (ctx == NULL) {
|
||||
print_error(err, "Failed creating context containing the sub-devices.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Create a command queue for each sub-device
|
||||
for (i=0; i<num_devices; i++) {
|
||||
if (devices[i]) {
|
||||
if ((queues[i] = clCreateCommandQueueWithProperties(ctx, devices[i], 0, &err)) == NULL) {
|
||||
print_error(err, "Failed creating command queues.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No partitioning available. Just exercise the APIs on a single device.
|
||||
devices[0] = deviceID;
|
||||
queues[0] = queue;
|
||||
ctx = context;
|
||||
}
|
||||
|
||||
// Build the kernel program.
|
||||
if (err = create_single_kernel_helper(ctx, &program, &kernel, 1, &buffer_migrate_kernel_code, "test_buffer_migrate")) {
|
||||
print_error(err, "Failed creating kernel.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
num_devices_limited = num_devices;
|
||||
|
||||
// Allocate memory buffers. 3 buffers (2 input, 1 output) for each sub-device.
|
||||
// If we run out of memory, then restrict the number of sub-devices to be tested.
|
||||
for (i=0; i<num_devices; i++) {
|
||||
bufferA[i] = init_buffer(queues[i], clCreateBuffer(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR), sizeof(cl_uint) * BUFFER_SIZE, NULL, &err), A);
|
||||
bufferB[i] = init_buffer(queues[i], clCreateBuffer(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR), sizeof(cl_uint) * BUFFER_SIZE, NULL, &err), B);
|
||||
bufferC[i] = clCreateBuffer(ctx, (CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR), sizeof(cl_uint) * BUFFER_SIZE, NULL, &err);
|
||||
|
||||
if ((bufferA[i] == NULL) || (bufferB[i] == NULL) || (bufferC[i] == NULL)) {
|
||||
if (i == 0) {
|
||||
log_error("Failed to allocate even 1 set of buffers.\n");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
num_devices_limited = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// For each partition, we will execute the test kernel with each of the 3 buffers migrated to one of the migrate options
|
||||
for (migrateA=(enum migrations)(0); migrateA<NUMBER_OF_MIGRATIONS; migrateA = (enum migrations)((int)migrateA + 1)) {
|
||||
if (migrateMemObject(migrateA, queues, bufferA, num_devices_limited, flagsA, d) != CL_SUCCESS) {
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
for (migrateC=(enum migrations)(0); migrateC<NUMBER_OF_MIGRATIONS; migrateC = (enum migrations)((int)migrateC + 1)) {
|
||||
if (migrateMemObject(migrateC, queues, bufferC, num_devices_limited, flagsC, d) != CL_SUCCESS) {
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
for (migrateB=(enum migrations)(0); migrateB<NUMBER_OF_MIGRATIONS; migrateB = (enum migrations)((int)migrateB + 1)) {
|
||||
if (migrateMemObject(migrateB, queues, bufferB, num_devices_limited, flagsB, d) != CL_SUCCESS) {
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
// Run the test on each of the partitions.
|
||||
for (i=0; i<num_devices_limited; i++) {
|
||||
cl_uint x;
|
||||
|
||||
x = i + test_number;
|
||||
|
||||
if ((err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (const void *)&bufferC[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed set kernel argument 0.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if ((err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (const void *)&bufferA[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed set kernel argument 1.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if ((err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (const void *)&bufferB[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed set kernel argument 2.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if ((err = clSetKernelArg(kernel, 3, sizeof(cl_uint), (const void *)&x)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed set kernel argument 3.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if ((err = clEnqueueNDRangeKernel(queues[i], kernel, 1, NULL, wgs, NULL, 0, NULL, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed enqueueing the NDRange kernel.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
// Verify the results as long as neither input is an undefined migration
|
||||
for (i=0; i<num_devices_limited; i++, test_number++) {
|
||||
if (((flagsA[i] | flagsB[i]) & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) == 0) {
|
||||
if ((err = clEnqueueReadBuffer(queues[i], bufferC[i], CL_TRUE, 0, sizeof(cl_uint)*BUFFER_SIZE, C, 0, NULL, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed reading output buffer.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
for (j=0; j<BUFFER_SIZE; j++) {
|
||||
cl_uint expected;
|
||||
|
||||
expected = A[j] ^ B[j] ^ test_number;
|
||||
if (C[j] != expected) {
|
||||
log_error("Failed on device %d, work item %4d, expected 0x%08x got 0x%08x (0x%08x ^ 0x%08x ^ 0x%08x)\n", i, j, expected, C[j], A[j], B[j], test_number);
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
if (failed) goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
if (restoreBuffer(queues, bufferB, num_devices_limited, flagsB, B) != CL_SUCCESS) {
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (restoreBuffer(queues, bufferA, num_devices_limited, flagsA, A) != CL_SUCCESS) {
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
// Clean up all the allocted resources create by the test. This includes sub-devices,
|
||||
// command queues, and memory buffers.
|
||||
|
||||
for (i=0; i<max_sub_devices; i++) {
|
||||
// Memory buffer cleanup
|
||||
if (bufferA[i]) {
|
||||
if ((err = clReleaseMemObject(bufferA[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing memory object.");
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
if (bufferB[i]) {
|
||||
if ((err = clReleaseMemObject(bufferB[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing memory object.");
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
if (bufferC[i]) {
|
||||
if ((err = clReleaseMemObject(bufferC[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing memory object.");
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (num_devices > 1) {
|
||||
// Command queue cleanup
|
||||
if (queues[i]) {
|
||||
if ((err = clReleaseCommandQueue(queues[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing command queue.");
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Sub-device cleanup
|
||||
if (devices[i]) {
|
||||
if ((err = clReleaseDevice(devices[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing sub device.");
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
devices[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Context, program, and kernel cleanup
|
||||
if (program) {
|
||||
if ((err = clReleaseProgram(program)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing program.");
|
||||
failed = 1;
|
||||
}
|
||||
program = NULL;
|
||||
}
|
||||
|
||||
if (kernel) {
|
||||
if ((err = clReleaseKernel(kernel)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing kernel.");
|
||||
failed = 1;
|
||||
}
|
||||
kernel = NULL;
|
||||
}
|
||||
|
||||
if (ctx && (ctx != context)) {
|
||||
if ((err = clReleaseContext(ctx)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing context.");
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
ctx = NULL;
|
||||
|
||||
if (failed) goto cleanup_allocations;
|
||||
} while (domains);
|
||||
|
||||
cleanup_allocations:
|
||||
if (devices) free(devices);
|
||||
if (queues) free(queues);
|
||||
if (flagsA) free(flagsA);
|
||||
if (flagsB) free(flagsB);
|
||||
if (flagsC) free(flagsC);
|
||||
if (bufferA) free(bufferA);
|
||||
if (bufferB) free(bufferB);
|
||||
if (bufferC) free(bufferC);
|
||||
|
||||
return ((failed) ? -1 : 0);
|
||||
}
|
||||
1463
test_conformance/buffers/test_buffer_read.c
Normal file
1463
test_conformance/buffers/test_buffer_read.c
Normal file
File diff suppressed because it is too large
Load Diff
1956
test_conformance/buffers/test_buffer_write.c
Normal file
1956
test_conformance/buffers/test_buffer_write.c
Normal file
File diff suppressed because it is too large
Load Diff
487
test_conformance/buffers/test_image_migrate.c
Normal file
487
test_conformance/buffers/test_image_migrate.c
Normal file
@@ -0,0 +1,487 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
#define MAX_SUB_DEVICES 16 // Limit the sub-devices to ensure no out of resource errors.
|
||||
#define MEM_OBJ_SIZE 1024
|
||||
#define IMAGE_DIM 16
|
||||
|
||||
// Kernel source code
|
||||
static const char *image_migrate_kernel_code =
|
||||
"__kernel void test_image_migrate(write_only image2d_t dst, read_only image2d_t src1,\n"
|
||||
" read_only image2d_t src2, sampler_t sampler, uint x)\n"
|
||||
"{\n"
|
||||
" int tidX = get_global_id(0), tidY = get_global_id(1);\n"
|
||||
" int2 coords = (int2) {tidX, tidY};\n"
|
||||
" uint4 val = read_imageui(src1, sampler, coords) ^\n"
|
||||
" read_imageui(src2, sampler, coords) ^\n"
|
||||
" x;\n"
|
||||
" write_imageui(dst, coords, val);\n"
|
||||
"}\n";
|
||||
|
||||
enum migrations { MIGRATE_PREFERRED, // migrate to the preferred sub-device
|
||||
MIGRATE_NON_PREFERRED, // migrate to a randomly chosen non-preferred sub-device
|
||||
MIGRATE_RANDOM, // migrate to a randomly chosen sub-device with randomly chosen flags
|
||||
NUMBER_OF_MIGRATIONS };
|
||||
|
||||
static cl_mem init_image(cl_command_queue cmd_q, cl_mem image, cl_uint *data)
|
||||
{
|
||||
cl_int err;
|
||||
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
|
||||
|
||||
if (image) {
|
||||
if ((err = clEnqueueWriteImage(cmd_q, image, CL_TRUE,
|
||||
origin, region, 0, 0, data, 0, NULL, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed on enqueue write of image data.");
|
||||
}
|
||||
}
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
static cl_int migrateMemObject(enum migrations migrate, cl_command_queue *queues, cl_mem *mem_objects,
|
||||
cl_uint num_devices, cl_mem_migration_flags *flags, MTdata d)
|
||||
{
|
||||
cl_uint i, j;
|
||||
cl_int err = CL_SUCCESS;
|
||||
|
||||
for (i=0; i<num_devices; i++) {
|
||||
j = genrand_int32(d) % num_devices;
|
||||
flags[i] = 0;
|
||||
switch (migrate) {
|
||||
case MIGRATE_PREFERRED:
|
||||
// Force the device to be preferred
|
||||
j = i;
|
||||
break;
|
||||
case MIGRATE_NON_PREFERRED:
|
||||
// Coerce the device to be non-preferred
|
||||
if ((j == i) && (num_devices > 1)) j = (j+1) % num_devices;
|
||||
break;
|
||||
case MIGRATE_RANDOM:
|
||||
// Choose a random set of flags
|
||||
flags[i] = (cl_mem_migration_flags)(genrand_int32(d) & (CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED));
|
||||
break;
|
||||
}
|
||||
if ((err = clEnqueueMigrateMemObjects(queues[j], 1, (const cl_mem *)(&mem_objects[i]),
|
||||
flags[i], 0, NULL, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed migrating memory object.");
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static cl_int restoreImage(cl_command_queue *queues, cl_mem *mem_objects, cl_uint num_devices,
|
||||
cl_mem_migration_flags *flags, cl_uint *buffer)
|
||||
{
|
||||
cl_uint i;
|
||||
cl_int err;
|
||||
|
||||
const size_t origin[3] = {0, 0, 0};
|
||||
const size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
|
||||
|
||||
// If the image was previously migrated with undefined content, reload the content.
|
||||
|
||||
for (i=0; i<num_devices; i++) {
|
||||
if (flags[i] & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) {
|
||||
if ((err = clEnqueueWriteImage(queues[i], mem_objects[i], CL_TRUE,
|
||||
origin, region, 0, 0, buffer, 0, NULL, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed on restoration enqueue write of image data.");
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
// Declaration moved out of protected scope/goto
|
||||
cl_sampler_properties properties[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
|
||||
CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP,
|
||||
CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
|
||||
0
|
||||
};
|
||||
|
||||
int test_image_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int failed = 0;
|
||||
cl_uint i, j;
|
||||
cl_int err;
|
||||
cl_uint max_sub_devices = 0;
|
||||
cl_uint num_devices, num_devices_limited;
|
||||
cl_uint A[MEM_OBJ_SIZE], B[MEM_OBJ_SIZE], C[MEM_OBJ_SIZE];
|
||||
cl_uint test_number = 1;
|
||||
cl_device_affinity_domain domain, domains;
|
||||
cl_device_id *devices;
|
||||
cl_command_queue *queues;
|
||||
cl_mem_migration_flags *flagsA, *flagsB, *flagsC;
|
||||
cl_device_partition_property property[] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0, 0};
|
||||
cl_mem *imageA, *imageB, *imageC;
|
||||
cl_mem_flags flags;
|
||||
cl_image_format format;
|
||||
cl_sampler sampler = NULL;
|
||||
cl_program program = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
cl_context ctx = NULL;
|
||||
enum migrations migrateA, migrateB, migrateC;
|
||||
MTdata d = init_genrand(gRandomSeed);
|
||||
const size_t wgs[2] = {IMAGE_DIM, IMAGE_DIM};
|
||||
const size_t wls[2] = {1, 1};
|
||||
|
||||
// Check for image support.
|
||||
if(checkForImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
|
||||
log_info("Device does not support images. Skipping test.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Allocate arrays whose size varies according to the maximum number of sub-devices.
|
||||
if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_sub_devices), &max_sub_devices, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_COMPUTE_UNITS) failed");
|
||||
return -1;
|
||||
}
|
||||
if (max_sub_devices < 1) {
|
||||
log_error("ERROR: Invalid number of compute units returned.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
devices = (cl_device_id *)malloc(max_sub_devices * sizeof(cl_device_id));
|
||||
queues = (cl_command_queue *)malloc(max_sub_devices * sizeof(cl_command_queue));
|
||||
flagsA = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
|
||||
flagsB = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
|
||||
flagsC = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
|
||||
imageA = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
|
||||
imageB = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
|
||||
imageC = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
|
||||
|
||||
if ((devices == NULL) || (queues == NULL) ||
|
||||
(flagsA == NULL) || (flagsB == NULL) || (flagsC == NULL) ||
|
||||
(imageA == NULL) || (imageB == NULL) || (imageC == NULL)) {
|
||||
log_error("ERROR: Failed to successfully allocate required local buffers.\n");
|
||||
failed = -1;
|
||||
goto cleanup_allocations;
|
||||
}
|
||||
|
||||
for (i=0; i<max_sub_devices; i++) {
|
||||
devices[i] = NULL;
|
||||
queues [i] = NULL;
|
||||
imageA[i] = imageB[i] = imageC[i] = NULL;
|
||||
}
|
||||
|
||||
for (i=0; i<MEM_OBJ_SIZE; i++) {
|
||||
A[i] = genrand_int32(d);
|
||||
B[i] = genrand_int32(d);
|
||||
}
|
||||
|
||||
// Set image format.
|
||||
format.image_channel_order = CL_RGBA;
|
||||
format.image_channel_data_type = CL_UNSIGNED_INT32;
|
||||
|
||||
|
||||
// Attempt to partition the device along each of the allowed affinity domain.
|
||||
if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(domains), &domains, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "clGetDeviceInfo(CL_PARTITION_AFFINITY_DOMAIN) failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
domains &= (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE |
|
||||
CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE | CL_DEVICE_AFFINITY_DOMAIN_NUMA);
|
||||
|
||||
do {
|
||||
if (domains) {
|
||||
for (domain = 1; (domain & domains) == 0; domain <<= 1) {};
|
||||
domains &= ~domain;
|
||||
} else {
|
||||
domain = 0;
|
||||
}
|
||||
|
||||
// Determine the number of partitions for the device given the specific domain.
|
||||
if (domain) {
|
||||
property[1] = domain;
|
||||
err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, -1, NULL, &num_devices);
|
||||
if ((err != CL_SUCCESS) || (num_devices == 0)) {
|
||||
print_error(err, "Obtaining the number of partions by affinity failed.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
} else {
|
||||
num_devices = 1;
|
||||
}
|
||||
|
||||
if (num_devices > 1) {
|
||||
// Create each of the sub-devices and a corresponding context.
|
||||
if ((err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, num_devices, devices, &num_devices)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed creating sub devices.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Create a context containing all the sub-devices
|
||||
ctx = clCreateContext(NULL, num_devices, devices, notify_callback, NULL, &err);
|
||||
if (ctx == NULL) {
|
||||
print_error(err, "Failed creating context containing the sub-devices.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Create a command queue for each sub-device
|
||||
for (i=0; i<num_devices; i++) {
|
||||
if (devices[i]) {
|
||||
if ((queues[i] = clCreateCommandQueueWithProperties(ctx, devices[i], 0, &err)) == NULL) {
|
||||
print_error(err, "Failed creating command queues.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No partitioning available. Just exercise the APIs on a single device.
|
||||
devices[0] = deviceID;
|
||||
queues[0] = queue;
|
||||
ctx = context;
|
||||
}
|
||||
|
||||
// Build the kernel program.
|
||||
if (err = create_single_kernel_helper(ctx, &program, &kernel, 1, &image_migrate_kernel_code, "test_image_migrate")) {
|
||||
print_error(err, "Failed creating kernel.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Create sampler.
|
||||
sampler = clCreateSamplerWithProperties(ctx, properties, &err );
|
||||
if ((err != CL_SUCCESS) || !sampler) {
|
||||
print_error(err, "Failed to create a sampler.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
num_devices_limited = num_devices;
|
||||
|
||||
// Allocate memory buffers. 3 buffers (2 input, 1 output) for each sub-device.
|
||||
// If we run out of memory, then restrict the number of sub-devices to be tested.
|
||||
for (i=0; i<num_devices; i++) {
|
||||
imageA[i] = init_image(queues[i], create_image_2d(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR),
|
||||
&format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err), A);
|
||||
imageB[i] = init_image(queues[i], create_image_2d(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR),
|
||||
&format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err), B);
|
||||
imageC[i] = create_image_2d(ctx, (CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR),
|
||||
&format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err);
|
||||
|
||||
if ((imageA[i] == NULL) || (imageB[i] == NULL) || (imageC[i] == NULL)) {
|
||||
if (i == 0) {
|
||||
log_error("Failed to allocate even 1 set of buffers.\n");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
num_devices_limited = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// For each partition, we will execute the test kernel with each of the 3 buffers migrated to one of the migrate options
|
||||
for (migrateA=(enum migrations)(0); migrateA<NUMBER_OF_MIGRATIONS; migrateA = (enum migrations)((int)migrateA + 1)) {
|
||||
if (migrateMemObject(migrateA, queues, imageA, num_devices_limited, flagsA, d) != CL_SUCCESS) {
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
for (migrateC=(enum migrations)(0); migrateC<NUMBER_OF_MIGRATIONS; migrateC = (enum migrations)((int)migrateC + 1)) {
|
||||
if (migrateMemObject(migrateC, queues, imageC, num_devices_limited, flagsC, d) != CL_SUCCESS) {
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
for (migrateB=(enum migrations)(0); migrateB<NUMBER_OF_MIGRATIONS; migrateB = (enum migrations)((int)migrateB + 1)) {
|
||||
if (migrateMemObject(migrateB, queues, imageB, num_devices_limited, flagsB, d) != CL_SUCCESS) {
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
// Run the test on each of the partitions.
|
||||
for (i=0; i<num_devices_limited; i++) {
|
||||
cl_uint x;
|
||||
|
||||
x = i + test_number;
|
||||
|
||||
if ((err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (const void *)&imageC[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed set kernel argument 0.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if ((err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (const void *)&imageA[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed set kernel argument 1.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if ((err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (const void *)&imageB[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed set kernel argument 2.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if ((err = clSetKernelArg(kernel, 3, sizeof(cl_sampler), (const void *)&sampler)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed set kernel argument 3.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if ((err = clSetKernelArg(kernel, 4, sizeof(cl_uint), (const void *)&x)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed set kernel argument 4.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if ((err = clEnqueueNDRangeKernel(queues[i], kernel, 2, NULL, wgs, wls, 0, NULL, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed enqueueing the NDRange kernel.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
// Verify the results as long as neither input is an undefined migration
|
||||
const size_t origin[3] = {0, 0, 0};
|
||||
const size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
|
||||
|
||||
for (i=0; i<num_devices_limited; i++, test_number++) {
|
||||
if (((flagsA[i] | flagsB[i]) & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) == 0) {
|
||||
if ((err = clEnqueueReadImage(queues[i], imageC[i], CL_TRUE,
|
||||
origin, region, 0, 0, C, 0, NULL, NULL)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed reading output buffer.");
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
for (j=0; j<MEM_OBJ_SIZE; j++) {
|
||||
cl_uint expected;
|
||||
|
||||
expected = A[j] ^ B[j] ^ test_number;
|
||||
if (C[j] != expected) {
|
||||
log_error("Failed on device %d, work item %4d, expected 0x%08x got 0x%08x (0x%08x ^ 0x%08x ^ 0x%08x)\n", i, j, expected, C[j], A[j], B[j], test_number);
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
if (failed) goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
if (restoreImage(queues, imageB, num_devices_limited, flagsB, B) != CL_SUCCESS) {
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (restoreImage(queues, imageA, num_devices_limited, flagsA, A) != CL_SUCCESS) {
|
||||
failed = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
// Clean up all the allocted resources create by the test. This includes sub-devices,
|
||||
// command queues, and memory buffers.
|
||||
|
||||
for (i=0; i<max_sub_devices; i++) {
|
||||
// Memory buffer cleanup
|
||||
if (imageA[i]) {
|
||||
if ((err = clReleaseMemObject(imageA[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing memory object.");
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
if (imageB[i]) {
|
||||
if ((err = clReleaseMemObject(imageB[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing memory object.");
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
if (imageC[i]) {
|
||||
if ((err = clReleaseMemObject(imageC[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing memory object.");
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_devices > 1) {
|
||||
// Command queue cleanup
|
||||
if (queues[i]) {
|
||||
if ((err = clReleaseCommandQueue(queues[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing command queue.");
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Sub-device cleanup
|
||||
if (devices[i]) {
|
||||
if ((err = clReleaseDevice(devices[i])) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing sub device.");
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
devices[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Sampler cleanup
|
||||
if (sampler) {
|
||||
if ((err = clReleaseSampler(sampler)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing sampler.");
|
||||
failed = 1;
|
||||
}
|
||||
sampler = NULL;
|
||||
}
|
||||
|
||||
// Context, program, and kernel cleanup
|
||||
if (program) {
|
||||
if ((err = clReleaseProgram(program)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing program.");
|
||||
failed = 1;
|
||||
}
|
||||
program = NULL;
|
||||
}
|
||||
|
||||
if (kernel) {
|
||||
if ((err = clReleaseKernel(kernel)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing kernel.");
|
||||
failed = 1;
|
||||
}
|
||||
kernel = NULL;
|
||||
}
|
||||
|
||||
if (ctx && (ctx != context)) {
|
||||
if ((err = clReleaseContext(ctx)) != CL_SUCCESS) {
|
||||
print_error(err, "Failed releasing context.");
|
||||
failed = 1;
|
||||
}
|
||||
}
|
||||
ctx = NULL;
|
||||
|
||||
if (failed) goto cleanup_allocations;
|
||||
} while (domains);
|
||||
|
||||
cleanup_allocations:
|
||||
if (devices) free(devices);
|
||||
if (queues) free(queues);
|
||||
if (flagsA) free(flagsA);
|
||||
if (flagsB) free(flagsB);
|
||||
if (flagsC) free(flagsC);
|
||||
if (imageA) free(imageA);
|
||||
if (imageB) free(imageB);
|
||||
if (imageC) free(imageC);
|
||||
|
||||
return ((failed) ? -1 : 0);
|
||||
}
|
||||
631
test_conformance/buffers/test_sub_buffers.cpp
Normal file
631
test_conformance/buffers/test_sub_buffers.cpp
Normal file
@@ -0,0 +1,631 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "procs.h"
|
||||
|
||||
// Design:
|
||||
// To test sub buffers, we first create one main buffer. We then create several sub-buffers and
|
||||
// queue Actions on each one. Each Action is encapsulated in a class so it can keep track of
|
||||
// what results it expects, and so we can test scaling degrees of Actions on scaling numbers of
|
||||
// sub-buffers.
|
||||
|
||||
class SubBufferWrapper : public clMemWrapper
|
||||
{
|
||||
public:
|
||||
cl_mem mParentBuffer;
|
||||
size_t mOrigin;
|
||||
size_t mSize;
|
||||
|
||||
cl_int Allocate( cl_mem parent, cl_mem_flags flags, size_t origin, size_t size )
|
||||
{
|
||||
mParentBuffer = parent;
|
||||
mOrigin = origin;
|
||||
mSize = size;
|
||||
|
||||
cl_buffer_region region;
|
||||
region.origin = mOrigin;
|
||||
region.size = mSize;
|
||||
|
||||
cl_int error;
|
||||
mMem = clCreateSubBuffer( mParentBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error );
|
||||
return error;
|
||||
}
|
||||
};
|
||||
|
||||
class Action
|
||||
{
|
||||
public:
|
||||
virtual ~Action() {}
|
||||
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) = 0;
|
||||
virtual const char * GetName( void ) const = 0;
|
||||
|
||||
static MTdata d;
|
||||
static MTdata GetRandSeed( void )
|
||||
{
|
||||
if ( d == 0 )
|
||||
d = init_genrand( gRandomSeed );
|
||||
return d;
|
||||
}
|
||||
static void FreeRandSeed() {
|
||||
if ( d != 0 ) {
|
||||
free_mtdata(d);
|
||||
d = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
MTdata Action::d = 0;
|
||||
|
||||
class ReadWriteAction : public Action
|
||||
{
|
||||
public:
|
||||
virtual ~ReadWriteAction() {}
|
||||
virtual const char * GetName( void ) const { return "ReadWrite";}
|
||||
|
||||
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
|
||||
{
|
||||
cl_char *tempBuffer = (cl_char*)malloc(buffer1.mSize);
|
||||
if (!tempBuffer) {
|
||||
log_error("Out of memory\n");
|
||||
return -1;
|
||||
}
|
||||
cl_int error = clEnqueueReadBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
|
||||
test_error( error, "Unable to enqueue buffer read" );
|
||||
|
||||
size_t start = get_random_size_t( 0, buffer1.mSize / 2, GetRandSeed() );
|
||||
size_t end = get_random_size_t( start, buffer1.mSize, GetRandSeed() );
|
||||
|
||||
for ( size_t i = start; i < end; i++ )
|
||||
{
|
||||
tempBuffer[ i ] |= tag;
|
||||
parentBufferState[ i + buffer1.mOrigin ] |= tag;
|
||||
}
|
||||
|
||||
error = clEnqueueWriteBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
|
||||
test_error( error, "Unable to enqueue buffer write" );
|
||||
free(tempBuffer);
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) )
|
||||
#endif
|
||||
#ifndef MIN
|
||||
#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) )
|
||||
#endif
|
||||
|
||||
class CopyAction : public Action
|
||||
{
|
||||
public:
|
||||
virtual ~CopyAction() {}
|
||||
virtual const char * GetName( void ) const { return "Copy";}
|
||||
|
||||
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
|
||||
{
|
||||
// Copy from sub-buffer 1 to sub-buffer 2
|
||||
size_t size = get_random_size_t( 0, MIN( buffer1.mSize, buffer2.mSize ), GetRandSeed() );
|
||||
|
||||
size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
|
||||
size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() );
|
||||
|
||||
cl_int error = clEnqueueCopyBuffer( queue, buffer1, buffer2, startOffset, endOffset, size, 0, NULL, NULL );
|
||||
test_error( error, "Unable to enqueue buffer copy" );
|
||||
|
||||
memcpy( parentBufferState + buffer2.mOrigin + endOffset, parentBufferState + buffer1.mOrigin + startOffset, size );
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
class MapAction : public Action
|
||||
{
|
||||
public:
|
||||
virtual ~MapAction() {}
|
||||
virtual const char * GetName( void ) const { return "Map";}
|
||||
|
||||
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
|
||||
{
|
||||
size_t size = get_random_size_t( 0, buffer1.mSize, GetRandSeed() );
|
||||
size_t start = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
|
||||
|
||||
cl_int error;
|
||||
void * mappedPtr = clEnqueueMapBuffer( queue, buffer1, CL_TRUE, (cl_map_flags)( CL_MAP_READ | CL_MAP_WRITE ),
|
||||
start, size, 0, NULL, NULL, &error );
|
||||
test_error( error, "Unable to map buffer" );
|
||||
|
||||
cl_char *cPtr = (cl_char *)mappedPtr;
|
||||
for ( size_t i = 0; i < size; i++ )
|
||||
{
|
||||
cPtr[ i ] |= tag;
|
||||
parentBufferState[ i + start + buffer1.mOrigin ] |= tag;
|
||||
}
|
||||
|
||||
error = clEnqueueUnmapMemObject( queue, buffer1, mappedPtr, 0, NULL, NULL );
|
||||
test_error( error, "Unable to unmap buffer" );
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
class KernelReadWriteAction : public Action
|
||||
{
|
||||
public:
|
||||
virtual ~KernelReadWriteAction() {}
|
||||
virtual const char * GetName( void ) const { return "KernelReadWrite";}
|
||||
|
||||
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
|
||||
{
|
||||
const char *kernelCode[] = {
|
||||
"__kernel void readTest( __global char *inBuffer, char tag )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" inBuffer[ tid ] |= tag;\n"
|
||||
"}\n" };
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
cl_int error;
|
||||
|
||||
if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t threads[1] = { buffer1.mSize };
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &buffer1 );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( tag ), &tag );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Unable to queue kernel" );
|
||||
|
||||
for ( size_t i = 0; i < buffer1.mSize; i++ )
|
||||
parentBufferState[ i + buffer1.mOrigin ] |= tag;
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
cl_int get_reasonable_buffer_size( cl_device_id device, size_t &outSize )
|
||||
{
|
||||
cl_ulong maxAllocSize;
|
||||
cl_int error;
|
||||
|
||||
// Get the largest possible buffer we could allocate
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
|
||||
test_error( error, "Unable to get max alloc size" );
|
||||
|
||||
// Don't create a buffer quite that big, just so we have some space left over for other work
|
||||
outSize = (size_t)( maxAllocSize / 5 );
|
||||
|
||||
// Cap at 32M so tests complete in a reasonable amount of time.
|
||||
if ( outSize > 32 << 20 )
|
||||
outSize = 32 << 20;
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
size_t find_subbuffer_by_index( SubBufferWrapper * subBuffers, size_t numSubBuffers, size_t index )
|
||||
{
|
||||
for ( size_t i = 0; i < numSubBuffers; i++ )
|
||||
{
|
||||
if ( subBuffers[ i ].mOrigin > index )
|
||||
return numSubBuffers;
|
||||
if ( ( subBuffers[ i ].mOrigin <= index ) && ( ( subBuffers[ i ].mOrigin + subBuffers[ i ].mSize ) > index ) )
|
||||
return i;
|
||||
}
|
||||
return numSubBuffers;
|
||||
}
|
||||
|
||||
// This tests the read/write capabilities of sub buffers (if we are read/write, the sub buffers
|
||||
// can't overlap)
|
||||
int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queueA, cl_command_queue queueB, size_t mainSize, size_t addressAlign )
|
||||
{
|
||||
clMemWrapper mainBuffer;
|
||||
SubBufferWrapper subBuffers[ 8 ];
|
||||
size_t numSubBuffers;
|
||||
cl_int error;
|
||||
size_t i;
|
||||
MTdata m = init_genrand( 22 );
|
||||
|
||||
|
||||
cl_char * mainBufferContents = (cl_char*)calloc(1,mainSize);
|
||||
cl_char * actualResults = (cl_char*)calloc(1,mainSize);
|
||||
|
||||
for ( i = 0; i < mainSize / 4; i++ )
|
||||
((cl_uint*) mainBufferContents)[i] = genrand_int32(m);
|
||||
|
||||
free_mtdata( m );
|
||||
|
||||
// Create the main buffer to test against
|
||||
mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, mainSize, mainBufferContents, &error );
|
||||
test_error( error, "Unable to create test main buffer" );
|
||||
|
||||
// Create some sub-buffers to use
|
||||
size_t toStartFrom = 0;
|
||||
for ( numSubBuffers = 0; numSubBuffers < 8; numSubBuffers++ )
|
||||
{
|
||||
size_t endRange = toStartFrom + ( mainSize / 4 );
|
||||
if ( endRange > mainSize )
|
||||
endRange = mainSize;
|
||||
|
||||
size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign;
|
||||
size_t size = get_random_size_t( 1, ( MIN( mainSize / 8, mainSize - offset ) ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
|
||||
error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size );
|
||||
test_error( error, "Unable to allocate sub buffer" );
|
||||
|
||||
toStartFrom = offset + size;
|
||||
if ( toStartFrom > ( mainSize - ( addressAlign * 256 ) ) )
|
||||
break;
|
||||
}
|
||||
|
||||
ReadWriteAction rwAction;
|
||||
MapAction mapAction;
|
||||
CopyAction copyAction;
|
||||
KernelReadWriteAction kernelAction;
|
||||
|
||||
Action * actions[] = { &rwAction, &mapAction, ©Action, &kernelAction };
|
||||
int numErrors = 0;
|
||||
|
||||
// Do the following steps twice, to make sure the parent gets updated *and* we can
|
||||
// still work on the sub-buffers
|
||||
cl_command_queue prev_queue = queueA;
|
||||
for ( int time = 0; time < 2; time++ )
|
||||
{
|
||||
// Randomly apply actions to the set of sub buffers
|
||||
size_t i;
|
||||
for ( i = 0; i < 64; i++ )
|
||||
{
|
||||
int which = random_in_range( 0, 3, Action::GetRandSeed() );
|
||||
int whichQueue = random_in_range( 0, 1, Action::GetRandSeed() );
|
||||
int whichBufferA = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
|
||||
int whichBufferB;
|
||||
do
|
||||
{
|
||||
whichBufferB = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
|
||||
} while ( whichBufferB == whichBufferA );
|
||||
|
||||
cl_command_queue queue = ( whichQueue == 1 ) ? queueB : queueA;
|
||||
if (queue != prev_queue) {
|
||||
error = clFinish( prev_queue );
|
||||
test_error( error, "Error finishing other queue." );
|
||||
|
||||
prev_queue = queue;
|
||||
}
|
||||
|
||||
error = actions[ which ]->Execute( context, queue, (cl_int)i, subBuffers[ whichBufferA ], subBuffers[ whichBufferB ], mainBufferContents );
|
||||
test_error( error, "Unable to execute action against sub buffers" );
|
||||
}
|
||||
|
||||
error = clFinish( queueA );
|
||||
test_error( error, "Error finishing queueA." );
|
||||
|
||||
error = clFinish( queueB );
|
||||
test_error( error, "Error finishing queueB." );
|
||||
|
||||
// Validate by reading the final contents of the main buffer and
|
||||
// validating against our ref copy we generated
|
||||
error = clEnqueueReadBuffer( queueA, mainBuffer, CL_TRUE, 0, mainSize, actualResults, 0, NULL, NULL );
|
||||
test_error( error, "Unable to enqueue buffer read" );
|
||||
|
||||
for ( i = 0; i < mainSize; i += 65536 )
|
||||
{
|
||||
size_t left = 65536;
|
||||
if ( ( i + left ) > mainSize )
|
||||
left = mainSize - i;
|
||||
|
||||
if ( memcmp( actualResults + i, mainBufferContents + i, left ) == 0 )
|
||||
continue;
|
||||
|
||||
// The fast compare failed, so we need to determine where exactly the failure is
|
||||
|
||||
for ( size_t j = 0; j < left; j++ )
|
||||
{
|
||||
if ( actualResults[ i + j ] != mainBufferContents[ i + j ] )
|
||||
{
|
||||
// Hit a failure; report the subbuffer at this address as having failed
|
||||
size_t sbThatFailed = find_subbuffer_by_index( subBuffers, numSubBuffers, i + j );
|
||||
if ( sbThatFailed == numSubBuffers )
|
||||
{
|
||||
log_error( "ERROR: Validation failure outside of a sub-buffer! (Shouldn't be possible, but it happened at index %ld out of %ld...)\n", i + j, mainSize );
|
||||
// Since this is a nonsensical, don't bother continuing to check
|
||||
// (we will, however, print our map of sub-buffers for comparison)
|
||||
for ( size_t k = 0; k < numSubBuffers; k++ )
|
||||
{
|
||||
log_error( "\tBuffer %ld: %ld to %ld (length %ld)\n", k, subBuffers[ k ].mOrigin, subBuffers[ k ].mOrigin + subBuffers[ k ].mSize, subBuffers[ k ].mSize );
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
log_error( "ERROR: Validation failure on sub-buffer %ld (start: %ld, length: %ld)\n", sbThatFailed, subBuffers[ sbThatFailed ].mOrigin, subBuffers[ sbThatFailed ].mSize );
|
||||
size_t newPos = subBuffers[ sbThatFailed ].mOrigin + subBuffers[ sbThatFailed ].mSize - 1;
|
||||
i = newPos & ~65535;
|
||||
j = newPos - i;
|
||||
numErrors++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(mainBufferContents);
|
||||
free(actualResults);
|
||||
Action::FreeRandSeed();
|
||||
|
||||
return numErrors;
|
||||
}
|
||||
|
||||
int test_sub_buffers_read_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
cl_int error;
|
||||
size_t mainSize;
|
||||
cl_uint addressAlignBits;
|
||||
|
||||
// Get the size of the main buffer to use
|
||||
error = get_reasonable_buffer_size( deviceID, mainSize );
|
||||
test_error( error, "Unable to get reasonable buffer size" );
|
||||
|
||||
// Determine the alignment of the device so we can make sure sub buffers are valid
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlignBits ), &addressAlignBits, NULL );
|
||||
test_error( error, "Unable to get device's address alignment" );
|
||||
|
||||
size_t addressAlign = addressAlignBits/8;
|
||||
|
||||
return test_sub_buffers_read_write_core( context, queue, queue, mainSize, addressAlign );
|
||||
}
|
||||
|
||||
// This test performs the same basic operations as sub_buffers_read_write, but instead of a single
|
||||
// device, it creates a context and buffer shared between two devices, then executes commands
|
||||
// on queues for each device to ensure that everything still operates as expected.
|
||||
int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
cl_int error;
|
||||
|
||||
|
||||
// First obtain the second device
|
||||
cl_device_id otherDevice = GetOpposingDevice( deviceID );
|
||||
if ( otherDevice == NULL )
|
||||
{
|
||||
log_error( "ERROR: Unable to obtain a second device for sub-buffer dual-device test.\n" );
|
||||
return -1;
|
||||
}
|
||||
if ( otherDevice == deviceID )
|
||||
{
|
||||
log_info( "Note: Unable to run dual-device sub-buffer test (only one device available). Skipping test (implicitly passing).\n" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Determine the device id.
|
||||
size_t param_size;
|
||||
error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, 0, NULL, ¶m_size );
|
||||
test_error( error, "Error obtaining device name" );
|
||||
|
||||
#if !(defined(_WIN32) && defined(_MSC_VER))
|
||||
char device_name[param_size];
|
||||
#else
|
||||
char* device_name = (char*)_malloca(param_size);
|
||||
#endif
|
||||
error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, param_size, &device_name[0], NULL );
|
||||
test_error( error, "Error obtaining device name" );
|
||||
|
||||
log_info( "\tOther device obtained for dual device test is type %s\n", device_name );
|
||||
|
||||
// Create a shared context for these two devices
|
||||
cl_device_id devices[ 2 ] = { deviceID, otherDevice };
|
||||
clContextWrapper testingContext = clCreateContext( NULL, 2, devices, NULL, NULL, &error );
|
||||
test_error( error, "Unable to create shared context" );
|
||||
|
||||
// Create two queues (can't use the existing one, because it's on the wrong context)
|
||||
clCommandQueueWrapper queue1 = clCreateCommandQueueWithProperties( testingContext, deviceID, 0, &error );
|
||||
test_error( error, "Unable to create command queue on main device" );
|
||||
|
||||
clCommandQueueWrapper queue2 = clCreateCommandQueueWithProperties( testingContext, otherDevice, 0, &error );
|
||||
test_error( error, "Unable to create command queue on secondary device" );
|
||||
|
||||
// Determine the reasonable buffer size and address alignment that applies to BOTH devices
|
||||
size_t maxBuffer1, maxBuffer2;
|
||||
error = get_reasonable_buffer_size( deviceID, maxBuffer1 );
|
||||
test_error( error, "Unable to get buffer size for main device" );
|
||||
|
||||
error = get_reasonable_buffer_size( otherDevice, maxBuffer2 );
|
||||
test_error( error, "Unable to get buffer size for secondary device" );
|
||||
maxBuffer1 = MIN( maxBuffer1, maxBuffer2 );
|
||||
|
||||
cl_uint addressAlign1Bits, addressAlign2Bits;
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL );
|
||||
test_error( error, "Unable to get main device's address alignment" );
|
||||
|
||||
error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL );
|
||||
test_error( error, "Unable to get secondary device's address alignment" );
|
||||
|
||||
cl_uint addressAlign1 = MAX( addressAlign1Bits, addressAlign2Bits ) / 8;
|
||||
|
||||
// Finally time to run!
|
||||
return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 );
|
||||
}
|
||||
|
||||
cl_int read_buffer_via_kernel( cl_context context, cl_command_queue queue, cl_mem buffer, size_t length, cl_char *outResults )
|
||||
{
|
||||
const char *kernelCode[] = {
|
||||
"__kernel void readTest( __global char *inBuffer, __global char *outBuffer )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" outBuffer[ tid ] = inBuffer[ tid ];\n"
|
||||
"}\n" };
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
cl_int error;
|
||||
|
||||
if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t threads[1] = { length };
|
||||
|
||||
clMemWrapper outStream = clCreateBuffer( context, CL_MEM_READ_WRITE, length, NULL, &error );
|
||||
test_error( error, "Unable to create output stream" );
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( buffer ), &buffer );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( outStream ), &outStream );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Unable to queue kernel" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, length, outResults, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results from kernel" );
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int test_sub_buffers_overlapping( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
||||
{
|
||||
cl_int error;
|
||||
size_t mainSize;
|
||||
cl_uint addressAlign;
|
||||
|
||||
clMemWrapper mainBuffer;
|
||||
SubBufferWrapper subBuffers[ 16 ];
|
||||
|
||||
|
||||
// Create the main buffer to test against
|
||||
error = get_reasonable_buffer_size( deviceID, mainSize );
|
||||
test_error( error, "Unable to get reasonable buffer size" );
|
||||
|
||||
mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, mainSize, NULL, &error );
|
||||
test_error( error, "Unable to create test main buffer" );
|
||||
|
||||
// Determine the alignment of the device so we can make sure sub buffers are valid
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign ), &addressAlign, NULL );
|
||||
test_error( error, "Unable to get device's address alignment" );
|
||||
|
||||
// Create some sub-buffers to use. Note: they don't have to not overlap (we actually *want* them to overlap)
|
||||
for ( size_t i = 0; i < 16; i++ )
|
||||
{
|
||||
size_t offset = get_random_size_t( 0, mainSize / addressAlign, Action::GetRandSeed() ) * addressAlign;
|
||||
size_t size = get_random_size_t( 1, ( mainSize - offset ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
|
||||
|
||||
error = subBuffers[ i ].Allocate( mainBuffer, CL_MEM_READ_ONLY, offset, size );
|
||||
test_error( error, "Unable to allocate sub buffer" );
|
||||
}
|
||||
|
||||
/// For logging, we determine the amount of overlap we just generated
|
||||
// Build a fast in-out map to help with generating the stats
|
||||
int sbMap[ 32 ], mapSize = 0;
|
||||
for ( int i = 0; i < 16; i++ )
|
||||
{
|
||||
int j;
|
||||
for ( j = 0; j < mapSize; j++ )
|
||||
{
|
||||
size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
|
||||
: subBuffers[ sbMap[ j ] ].mOrigin;
|
||||
if ( subBuffers[ i ].mOrigin < pt )
|
||||
{
|
||||
// Origin is before this part of the map, so move map forward so we can insert
|
||||
memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
|
||||
sbMap[ j ] = i;
|
||||
mapSize++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( j == mapSize )
|
||||
{
|
||||
sbMap[ j ] = i;
|
||||
mapSize++;
|
||||
}
|
||||
|
||||
size_t endPt = subBuffers[ i ].mOrigin + subBuffers[ i ].mSize;
|
||||
for ( j = 0; j < mapSize; j++ )
|
||||
{
|
||||
size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
|
||||
: subBuffers[ sbMap[ j ] ].mOrigin;
|
||||
if ( endPt < pt )
|
||||
{
|
||||
// Origin is before this part of the map, so move map forward so we can insert
|
||||
memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
|
||||
sbMap[ j ] = -( i + 1 );
|
||||
mapSize++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( j == mapSize )
|
||||
{
|
||||
sbMap[ j ] = -( i + 1 );
|
||||
mapSize++;
|
||||
}
|
||||
}
|
||||
long long delta = 0;
|
||||
size_t maxOverlap = 1, overlap = 0;
|
||||
for ( int i = 0; i < 32; i++ )
|
||||
{
|
||||
if ( sbMap[ i ] >= 0 )
|
||||
{
|
||||
overlap++;
|
||||
if ( overlap > 1 )
|
||||
delta -= (long long)( subBuffers[ sbMap[ i ] ].mOrigin );
|
||||
if ( overlap > maxOverlap )
|
||||
maxOverlap = overlap;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( overlap > 1 )
|
||||
delta += (long long)( subBuffers[ -sbMap[ i ] - 1 ].mOrigin + subBuffers[ -sbMap[ i ] - 1 ].mSize );
|
||||
overlap--;
|
||||
}
|
||||
}
|
||||
|
||||
log_info( "\tTesting %d sub-buffers with %lld overlapping Kbytes (%d%%; as many as %ld buffers overlapping at once)\n",
|
||||
16, ( delta / 1024LL ), (int)( delta * 100LL / (long long)mainSize ), maxOverlap );
|
||||
|
||||
// Write some random contents to the main buffer
|
||||
cl_char * contents = new cl_char[ mainSize ];
|
||||
generate_random_data( kChar, mainSize, Action::GetRandSeed(), contents );
|
||||
|
||||
error = clEnqueueWriteBuffer( queue, mainBuffer, CL_TRUE, 0, mainSize, contents, 0, NULL, NULL );
|
||||
test_error( error, "Unable to write to main buffer" );
|
||||
|
||||
// Now read from each sub-buffer and check to make sure that they make sense w.r.t. the main contents
|
||||
cl_char * tempBuffer = new cl_char[ mainSize ];
|
||||
|
||||
int numErrors = 0;
|
||||
for ( size_t i = 0; i < 16; i++ )
|
||||
{
|
||||
// Read from this buffer
|
||||
int which = random_in_range( 0, 1, Action::GetRandSeed() );
|
||||
if ( which )
|
||||
error = clEnqueueReadBuffer( queue, subBuffers[ i ], CL_TRUE, 0, subBuffers[ i ].mSize, tempBuffer, 0, NULL, NULL );
|
||||
else
|
||||
error = read_buffer_via_kernel( context, queue, subBuffers[ i ], subBuffers[ i ].mSize, tempBuffer );
|
||||
test_error( error, "Unable to read sub buffer contents" );
|
||||
|
||||
if ( memcmp( tempBuffer, contents + subBuffers[ i ].mOrigin, subBuffers[ i ].mSize ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Validation for sub-buffer %ld failed!\n", i );
|
||||
numErrors++;
|
||||
}
|
||||
}
|
||||
|
||||
delete [] contents;
|
||||
delete [] tempBuffer;
|
||||
Action::FreeRandSeed();
|
||||
|
||||
return numErrors;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user