mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Initial open source release of OpenCL 2.2 CTS.
This commit is contained in:
27
test_conformance/device_execution/CMakeLists.txt
Normal file
27
test_conformance/device_execution/CMakeLists.txt
Normal file
@@ -0,0 +1,27 @@
|
||||
set(MODULE_NAME DEVICE_EXECUTION)
|
||||
|
||||
set(DEVICE_EXECUTION_SOURCES
|
||||
device_info.cpp
|
||||
device_queue.cpp
|
||||
enqueue_block.cpp
|
||||
enqueue_flags.cpp
|
||||
enqueue_multi_queue.cpp
|
||||
enqueue_ndrange.cpp
|
||||
enqueue_wg_size.cpp
|
||||
execute_block.cpp
|
||||
host_multi_queue.cpp
|
||||
host_queue_order.cpp
|
||||
main.c
|
||||
nested_blocks.cpp
|
||||
utils.cpp
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/msvc9.c
|
||||
../../test_common/harness/parseParameters.cpp
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
|
||||
# end of file #
|
||||
52
test_conformance/device_execution/Makefile
Normal file
52
test_conformance/device_execution/Makefile
Normal file
@@ -0,0 +1,52 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c \
|
||||
device_info.cpp \
|
||||
device_queue.cpp \
|
||||
enqueue_block.cpp \
|
||||
enqueue_flags.cpp \
|
||||
enqueue_multi_queue.cpp \
|
||||
enqueue_ndrange.cpp \
|
||||
enqueue_wg_size.cpp \
|
||||
execute_block.cpp \
|
||||
host_multi_queue.cpp \
|
||||
host_queue_order.cpp \
|
||||
nested_blocks.cpp \
|
||||
utils.cpp \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/kernelHelpers.c \
|
||||
../../test_common/harness/typeWrappers.cpp \
|
||||
../../test_common/harness/mt19937.c \
|
||||
|
||||
DEFINES = DONT_TEST_GARBAGE_POINTERS
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
HEADERS =
|
||||
TARGET = test_device_execution
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
106
test_conformance/device_execution/device_info.cpp
Normal file
106
test_conformance/device_execution/device_info.cpp
Normal file
@@ -0,0 +1,106 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#include "procs.h"
|
||||
#include "utils.h"
|
||||
|
||||
static const cl_uint MIN_DEVICE_PREFFERED_QUEUE_SIZE = 16 * 1024;
|
||||
static const cl_uint MAX_DEVICE_QUEUE_SIZE = 256 * 1024;
|
||||
static const cl_uint MAX_DEVICE_EMBEDDED_QUEUE_SIZE = 64 * 1024;
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
|
||||
int test_device_info(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_int err_ret;
|
||||
int embedded = 0;
|
||||
size_t ret_len;
|
||||
char profile[32] = {0};
|
||||
cl_command_queue_properties properties;
|
||||
cl_uint size;
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_PROFILE) failed");
|
||||
if(ret_len < sizeof(profile) && strcmp(profile, "FULL_PROFILE") == 0) embedded = 0;
|
||||
else if(ret_len < sizeof(profile) && strcmp(profile, "EMBEDDED_PROFILE") == 0) embedded = 1;
|
||||
else
|
||||
{
|
||||
log_error("Unknown device profile: %s\n", profile);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof(properties), &properties, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_HOST_PROPERTIES) failed");
|
||||
if(!(properties&CL_QUEUE_PROFILING_ENABLE))
|
||||
{
|
||||
log_error("Host command-queue does not support mandated minimum capability: CL_QUEUE_PROFILING_ENABLE\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, sizeof(properties), &properties, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES) failed");
|
||||
if(!(properties&CL_QUEUE_PROFILING_ENABLE))
|
||||
{
|
||||
log_error("Device command-queue does not support mandated minimum capability: CL_QUEUE_PROFILING_ENABLE\n");
|
||||
return -1;
|
||||
}
|
||||
if(!(properties&CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE))
|
||||
{
|
||||
log_error("Device command-queue does not support mandated minimum capability: CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, sizeof(size), &size, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE) failed");
|
||||
if(size < MIN_DEVICE_PREFFERED_QUEUE_SIZE)
|
||||
{
|
||||
log_error("Device command-queue preferred size is less than minimum %dK: %dK\n", MIN_DEVICE_PREFFERED_QUEUE_SIZE/1024, size/1024);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(size), &size, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
|
||||
if(size < (embedded ? MAX_DEVICE_EMBEDDED_QUEUE_SIZE : MAX_DEVICE_QUEUE_SIZE))
|
||||
{
|
||||
log_error("Device command-queue maximum size is less than minimum %dK: %dK\n", (embedded ? MAX_DEVICE_EMBEDDED_QUEUE_SIZE : MAX_DEVICE_QUEUE_SIZE)/1024, size/1024);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(size), &size, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
|
||||
if(size < 1)
|
||||
{
|
||||
log_error("Maximum number of device queues is less than minimum 1: %d\n", size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_EVENTS, sizeof(size), &size, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_EVENTS) failed");
|
||||
if(size < 1024)
|
||||
{
|
||||
log_error("Maximum number of events in use by a device queue is less than minimum 1024: %d\n", size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
188
test_conformance/device_execution/device_queue.cpp
Normal file
188
test_conformance/device_execution/device_queue.cpp
Normal file
@@ -0,0 +1,188 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "procs.h"
|
||||
#include "utils.h"
|
||||
|
||||
static int check_device_queue(cl_device_id device, cl_context context, cl_command_queue queue, cl_uint size)
|
||||
{
|
||||
cl_int err_ret;
|
||||
cl_context q_context;
|
||||
cl_device_id q_device;
|
||||
cl_command_queue_properties q_properties;
|
||||
cl_uint q_size;
|
||||
size_t size_ret;
|
||||
|
||||
err_ret = clRetainCommandQueue(queue);
|
||||
test_error(err_ret, "clRetainCommandQueue() failed");
|
||||
|
||||
err_ret = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(q_context), &q_context, &size_ret);
|
||||
test_error(err_ret, "clGetCommandQueueInfo(CL_QUEUE_CONTEXT) failed");
|
||||
if(size_ret != sizeof(q_context) || q_context != context)
|
||||
{
|
||||
log_error("clGetCommandQueueInfo(CL_QUEUE_CONTEXT) returned invalid context\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err_ret = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(q_device), &q_device, &size_ret);
|
||||
test_error(err_ret, "clGetCommandQueueInfo(CL_QUEUE_DEVICE) failed");
|
||||
if(size_ret != sizeof(q_device) || q_device != device)
|
||||
{
|
||||
log_error("clGetCommandQueueInfo(CL_QUEUE_DEVICE) returned invalid device\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err_ret = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES, sizeof(q_properties), &q_properties, &size_ret);
|
||||
test_error(err_ret, "clGetCommandQueueInfo(CL_QUEUE_PROPERTIES) failed");
|
||||
if(size_ret != sizeof(q_properties) || !(q_properties & (CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE)))
|
||||
{
|
||||
log_error("clGetCommandQueueInfo(CL_QUEUE_PROPERTIES) returned invalid properties\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err_ret = clGetCommandQueueInfo(queue, CL_QUEUE_SIZE, sizeof(q_size), &q_size, &size_ret);
|
||||
test_error(err_ret, "clGetCommandQueueInfo(CL_QUEUE_SIZE) failed");
|
||||
if(size_ret != sizeof(q_size) || q_size < 1)
|
||||
{
|
||||
log_error("clGetCommandQueueInfo(CL_QUEUE_SIZE) returned invalid queue size\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err_ret = clReleaseCommandQueue(queue);
|
||||
test_error(err_ret, "clReleaseCommandQueue() failed");
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_device_queues(cl_device_id device, cl_context context, cl_uint num_queues, cl_queue_properties *properties, cl_uint size)
|
||||
{
|
||||
cl_int err_ret, res = 0;
|
||||
cl_uint i;
|
||||
std::vector<clCommandQueueWrapper> queue(num_queues);
|
||||
|
||||
// Create all queues
|
||||
for(i = 0; i < num_queues; ++i)
|
||||
{
|
||||
queue[i] = clCreateCommandQueueWithProperties(context, device, properties, &err_ret);
|
||||
test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE) failed");
|
||||
}
|
||||
|
||||
// Validate all queues
|
||||
for(i = 0; i < num_queues; ++i)
|
||||
{
|
||||
err_ret = check_device_queue(device, context, queue[i], size);
|
||||
if(check_error(err_ret, "Device queue[%d] validation failed", i)) res = -1;
|
||||
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int test_device_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_int err_ret, res = 0;
|
||||
size_t ret_len;
|
||||
clCommandQueueWrapper dev_queue;
|
||||
cl_uint preffered_size, max_size, max_queues;
|
||||
|
||||
cl_queue_properties queue_prop_def[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
|
||||
0
|
||||
};
|
||||
|
||||
cl_queue_properties queue_prop[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE,
|
||||
0
|
||||
};
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, sizeof(preffered_size), &preffered_size, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE) failed");
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(max_size), &max_size, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
|
||||
|
||||
if(max_queues > MAX_QUEUES) max_queues = MAX_QUEUES;
|
||||
|
||||
dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
|
||||
test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
|
||||
|
||||
err_ret = check_device_queue(device, context, dev_queue, preffered_size);
|
||||
if(check_error(err_ret, "Default device queue validation failed")) res = -1;
|
||||
|
||||
log_info("Default device queue is OK.\n");
|
||||
|
||||
if(max_queues > 1) // Check more queues if supported.
|
||||
{
|
||||
cl_uint q_size = preffered_size-1024;
|
||||
cl_queue_properties queue_prop_size[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE,
|
||||
CL_QUEUE_SIZE, q_size,
|
||||
0
|
||||
};
|
||||
|
||||
cl_queue_properties queue_prop_max[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE,
|
||||
CL_QUEUE_SIZE, max_size,
|
||||
0
|
||||
};
|
||||
{
|
||||
err_ret = check_device_queues(device, context, 1, queue_prop, preffered_size);
|
||||
if(check_error(err_ret, "Second device queue validation failed")) res = -1;
|
||||
else log_info("Second device queue is OK.\n");
|
||||
}
|
||||
{
|
||||
err_ret = check_device_queues(device, context, 1, queue_prop_size, q_size);
|
||||
if(check_error(err_ret, "Device queue with size validation failed")) res = -1;
|
||||
else log_info("Device queue with size is OK.\n");
|
||||
}
|
||||
{
|
||||
err_ret = check_device_queues(device, context, 1, queue_prop_max, max_size);
|
||||
if(check_error(err_ret, "Device queue max size validation failed")) res = -1;
|
||||
else log_info("Device queue max size is OK.\n");
|
||||
}
|
||||
{
|
||||
err_ret = check_device_queues(device, context, max_queues, queue_prop, preffered_size);
|
||||
if(check_error(err_ret, "Max number device queue validation failed")) res = -1;
|
||||
else log_info("Max number device queue is OK.\n");
|
||||
}
|
||||
{
|
||||
err_ret = check_device_queues(device, context, max_queues, queue_prop_size, q_size);
|
||||
if(check_error(err_ret, "Max number device queue with size validation failed")) res = -1;
|
||||
else log_info("Max number device queue with size is OK.\n");
|
||||
}
|
||||
{
|
||||
err_ret = check_device_queues(device, context, max_queues, queue_prop_max, max_size);
|
||||
if(check_error(err_ret, "Max number device queue with max size validation failed")) res = -1;
|
||||
else log_info("Max number device queue with max size is OK.\n");
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
684
test_conformance/device_execution/enqueue_block.cpp
Normal file
684
test_conformance/device_execution/enqueue_block.cpp
Normal file
@@ -0,0 +1,684 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "procs.h"
|
||||
#include "utils.h"
|
||||
#include <time.h>
|
||||
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
extern int gWimpyMode;
|
||||
static const char* enqueue_simple_block[] =
|
||||
{
|
||||
NL, "void block_fn(size_t tid, int mul, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " res[tid] = mul * 7 - 21;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_simple_block(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " int multiplier = 3;"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
|
||||
NL, ""
|
||||
NL, " res[tid] = -1;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_block_with_local_arg1[] =
|
||||
{
|
||||
NL, "#define LOCAL_MEM_SIZE 10"
|
||||
NL, ""
|
||||
NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp)"
|
||||
NL, "{"
|
||||
NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
|
||||
NL, " {"
|
||||
NL, " tmp[i] = mul * 7 - 21;"
|
||||
NL, " res[tid] += tmp[i];"
|
||||
NL, " }"
|
||||
NL, " res[tid] += 2;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_block_with_local_arg1(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " int multiplier = 3;"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); };"
|
||||
NL, ""
|
||||
NL, " res[tid] = -2;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)));"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_block_with_local_arg2[] =
|
||||
{
|
||||
NL, "#define LOCAL_MEM_SIZE 10"
|
||||
NL, ""
|
||||
NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2)"
|
||||
NL, "{"
|
||||
NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
|
||||
NL, " {"
|
||||
NL, " tmp1[i] = mul * 7 - 21;"
|
||||
NL, " tmp2[i].x = (float)(mul * 7 - 21);"
|
||||
NL, " tmp2[i].y = (float)(mul * 7 - 21);"
|
||||
NL, " tmp2[i].z = (float)(mul * 7 - 21);"
|
||||
NL, " tmp2[i].w = (float)(mul * 7 - 21);"
|
||||
NL, ""
|
||||
NL, " res[tid] += tmp1[i];"
|
||||
NL, " res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w);"
|
||||
NL, " }"
|
||||
NL, " res[tid] += 2;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_block_with_local_arg2(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " int multiplier = 3;"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2)"
|
||||
NL, " { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); };"
|
||||
NL, ""
|
||||
NL, " res[tid] = -2;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4)));"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_block_with_wait_list[] =
|
||||
{
|
||||
NL, "#define BLOCK_SUBMITTED 1"
|
||||
NL, "#define BLOCK_COMPLETED 2"
|
||||
NL, "#define CHECK_SUCCESS 0"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_block_with_wait_list(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " clk_event_t user_evt = create_user_event();"
|
||||
NL, ""
|
||||
NL, " res[tid] = BLOCK_SUBMITTED;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, " clk_event_t block_evt;"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt,"
|
||||
NL, " ^{"
|
||||
NL, " res[tid] = BLOCK_COMPLETED;"
|
||||
NL, " });"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, ""
|
||||
NL, " retain_event(block_evt);"
|
||||
NL, " release_event(block_evt);"
|
||||
NL, ""
|
||||
NL, " //check block is not started"
|
||||
NL, " if(res[tid] == BLOCK_SUBMITTED)"
|
||||
NL, " {"
|
||||
NL, " clk_event_t my_evt;"
|
||||
NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, "
|
||||
NL, " ^{"
|
||||
NL, " //check block is completed"
|
||||
NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
|
||||
NL, " });"
|
||||
NL, " release_event(my_evt);"
|
||||
NL, " }"
|
||||
NL, ""
|
||||
NL, " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL, ""
|
||||
NL, " release_event(user_evt);"
|
||||
NL, " release_event(block_evt);"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_block_with_wait_list_and_local_arg[] =
|
||||
{
|
||||
NL, "#define LOCAL_MEM_SIZE 10"
|
||||
NL, "#define BLOCK_COMPLETED 1"
|
||||
NL, "#define BLOCK_SUBMITTED 2"
|
||||
NL, "#define BLOCK_STARTED 3"
|
||||
NL, "#define CHECK_SUCCESS 0"
|
||||
NL, ""
|
||||
NL, "void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp)"
|
||||
NL, "{"
|
||||
NL, " res[tid] = BLOCK_STARTED;"
|
||||
NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
|
||||
NL, " {"
|
||||
NL, " tmp[i] = mul * 7 - 21;"
|
||||
NL, " res[tid] += tmp[i];"
|
||||
NL, " }"
|
||||
NL, " if(res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " int multiplier = 3;"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " clk_event_t user_evt = create_user_event();"
|
||||
NL, ""
|
||||
NL, " res[tid] = BLOCK_SUBMITTED;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, " clk_event_t block_evt;"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, "
|
||||
NL, " ^(__local void* buf) {"
|
||||
NL, " block_fn_local_arg(tid, multiplier, res, (__local int*)buf);"
|
||||
NL, " }, LOCAL_MEM_SIZE*sizeof(int));"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, ""
|
||||
NL, " retain_event(block_evt);"
|
||||
NL, " release_event(block_evt);"
|
||||
NL, ""
|
||||
NL, " //check block is not started"
|
||||
NL, " if(res[tid] == BLOCK_SUBMITTED)"
|
||||
NL, " {"
|
||||
NL, " clk_event_t my_evt;"
|
||||
NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, "
|
||||
NL, " ^{"
|
||||
NL, " //check block is completed"
|
||||
NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
|
||||
NL, " });"
|
||||
NL, " release_event(my_evt);"
|
||||
NL, " }"
|
||||
NL, ""
|
||||
NL, " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL, ""
|
||||
NL, " release_event(user_evt);"
|
||||
NL, " release_event(block_evt);"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_block_get_kernel_work_group_size[] =
|
||||
{
|
||||
NL, "void block_fn(size_t tid, int mul, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " res[tid] = mul * 7 - 21;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_block_get_kernel_work_group_size(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " int multiplier = 3;"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
|
||||
NL, ""
|
||||
NL, " size_t local_work_size = get_kernel_work_group_size(kernelBlock);"
|
||||
NL, " if (local_work_size <= 0){ res[tid] = -1; return; }"
|
||||
NL, " size_t global_work_size = local_work_size * 4;"
|
||||
NL, ""
|
||||
NL, " res[tid] = -1;"
|
||||
NL, " queue_t q1 = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);"
|
||||
NL, ""
|
||||
NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, "}"
|
||||
};
|
||||
|
||||
static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] =
|
||||
{
|
||||
NL, "void block_fn(size_t tid, int mul, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " res[tid] = mul * 7 - 21;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " int multiplier = 3;"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
|
||||
NL, ""
|
||||
NL, " size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock);"
|
||||
NL, " if (local_work_size <= 0){ res[tid] = -1; return; }"
|
||||
NL, " size_t global_work_size = local_work_size * 4;"
|
||||
NL, ""
|
||||
NL, " res[tid] = -1;"
|
||||
NL, " queue_t q1 = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);"
|
||||
NL, ""
|
||||
NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, "}"
|
||||
};
|
||||
|
||||
static const char* enqueue_block_capture_event_profiling_info_after_execution[] =
|
||||
{
|
||||
NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS)
|
||||
NL, ""
|
||||
NL, "__global ulong value[MAX_GWS*2] = {0};"
|
||||
NL, ""
|
||||
NL, "void block_fn(size_t tid, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " res[tid] = -2;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "void check_res(size_t tid, const clk_event_t evt, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);"
|
||||
NL, ""
|
||||
NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;"
|
||||
NL, " else res[tid] = -4;"
|
||||
NL, " release_event(evt);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " res[tid] = -1;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, " clk_event_t block_evt1;"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };"
|
||||
NL, ""
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, ""
|
||||
NL, " void (^checkBlock) (void) = ^{ check_res(tid, block_evt1, res); };"
|
||||
NL, ""
|
||||
NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_block_capture_event_profiling_info_before_execution[] =
|
||||
{
|
||||
NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS)
|
||||
NL, ""
|
||||
NL, "__global ulong value[MAX_GWS*2] = {0};"
|
||||
NL, ""
|
||||
NL, "void block_fn(size_t tid, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " res[tid] = -2;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "void check_res(size_t tid, const ulong *value, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;"
|
||||
NL, " else res[tid] = -4;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " int multiplier = 3;"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " clk_event_t user_evt = create_user_event();"
|
||||
NL, ""
|
||||
NL, " res[tid] = -1;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, " clk_event_t block_evt1;"
|
||||
NL, " clk_event_t block_evt2;"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };"
|
||||
NL, ""
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, ""
|
||||
NL, " capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);"
|
||||
NL, ""
|
||||
NL, " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL, ""
|
||||
NL, " void (^checkBlock) (void) = ^{ check_res(tid, &value, res); };"
|
||||
NL, ""
|
||||
NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
|
||||
NL, ""
|
||||
NL, " release_event(user_evt);"
|
||||
NL, " release_event(block_evt1);"
|
||||
NL, " release_event(block_evt2);"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_block_with_barrier[] =
|
||||
{
|
||||
NL, "void block_fn(size_t tid, int mul, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " if(mul > 0) barrier(CLK_GLOBAL_MEM_FENCE);"
|
||||
NL, " res[tid] = mul * 7 -21;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "void loop_fn(size_t tid, int n, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " while(n > 0)"
|
||||
NL, " {"
|
||||
NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
|
||||
NL, " res[tid] = 0;"
|
||||
NL, " --n;"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_block_with_barrier(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " int multiplier = 3;"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " res[tid] = -1;"
|
||||
NL, " size_t n = 256;"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
|
||||
NL, ""
|
||||
NL, " ndrange_t ndrange = ndrange_1D(n);"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, ""
|
||||
NL, " void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); };"
|
||||
NL, ""
|
||||
NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_marker_with_block_event[] =
|
||||
{
|
||||
NL, "#define BLOCK_COMPLETED 1"
|
||||
NL, "#define BLOCK_SUBMITTED 2"
|
||||
NL, "#define CHECK_SUCCESS 0"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_marker_with_block_event(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " clk_event_t user_evt = create_user_event();"
|
||||
NL, ""
|
||||
NL, " res[tid] = BLOCK_SUBMITTED;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, ""
|
||||
NL, " clk_event_t block_evt1;"
|
||||
NL, " clk_event_t marker_evt;"
|
||||
NL, ""
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1,"
|
||||
NL, " ^{"
|
||||
NL, " res[tid] = BLOCK_COMPLETED;"
|
||||
NL, " });"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }"
|
||||
NL, ""
|
||||
NL, " enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
|
||||
NL, ""
|
||||
NL, " retain_event(marker_evt);"
|
||||
NL, " release_event(marker_evt);"
|
||||
NL, ""
|
||||
NL, " //check block is not started"
|
||||
NL, " if(res[tid] == BLOCK_SUBMITTED)"
|
||||
NL, " {"
|
||||
NL, " clk_event_t my_evt;"
|
||||
NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, "
|
||||
NL, " ^{"
|
||||
NL, " //check block is completed"
|
||||
NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
|
||||
NL, " });"
|
||||
NL, " release_event(my_evt);"
|
||||
NL, " }"
|
||||
NL, ""
|
||||
NL, " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL, ""
|
||||
NL, " release_event(block_evt1);"
|
||||
NL, " release_event(marker_evt);"
|
||||
NL, " release_event(user_evt);"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_marker_with_user_event[] =
|
||||
{
|
||||
NL, "#define BLOCK_COMPLETED 1"
|
||||
NL, "#define BLOCK_SUBMITTED 2"
|
||||
NL, "#define CHECK_SUCCESS 0"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_marker_with_user_event(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " uint multiplier = 7;"
|
||||
NL, ""
|
||||
NL, " clk_event_t user_evt = create_user_event();"
|
||||
NL, ""
|
||||
NL, " res[tid] = BLOCK_SUBMITTED;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, ""
|
||||
NL, " clk_event_t marker_evt;"
|
||||
NL, " clk_event_t block_evt;"
|
||||
NL, ""
|
||||
NL, " int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, ""
|
||||
NL, " retain_event(marker_evt);"
|
||||
NL, " release_event(marker_evt);"
|
||||
NL, ""
|
||||
NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt, "
|
||||
NL, " ^{"
|
||||
NL, " if(res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS;"
|
||||
NL, " });"
|
||||
NL, ""
|
||||
NL, " //check block is not started"
|
||||
NL, " if(res[tid] != BLOCK_SUBMITTED) { res[tid] = -2; return; }"
|
||||
NL, ""
|
||||
NL, " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL, ""
|
||||
NL, " release_event(block_evt);"
|
||||
NL, " release_event(marker_evt);"
|
||||
NL, " release_event(user_evt);"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_marker_with_mixed_events[] =
|
||||
{
|
||||
NL, "#define BLOCK_COMPLETED 1"
|
||||
NL, "#define BLOCK_SUBMITTED 2"
|
||||
NL, "#define CHECK_SUCCESS 0"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_marker_with_mixed_events(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " clk_event_t mix_ev[2];"
|
||||
NL, " mix_ev[0] = create_user_event();"
|
||||
NL, ""
|
||||
NL, " res[tid] = BLOCK_SUBMITTED;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, ""
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1],"
|
||||
NL, " ^{"
|
||||
NL, " res[tid] = BLOCK_COMPLETED;"
|
||||
NL, " });"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }"
|
||||
NL, ""
|
||||
NL, " clk_event_t marker_evt;"
|
||||
NL, ""
|
||||
NL, " enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
|
||||
NL, ""
|
||||
NL, " retain_event(marker_evt);"
|
||||
NL, " release_event(marker_evt);"
|
||||
NL, ""
|
||||
NL, " //check block is not started"
|
||||
NL, " if(res[tid] == BLOCK_SUBMITTED)"
|
||||
NL, " {"
|
||||
NL, " clk_event_t my_evt;"
|
||||
NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, "
|
||||
NL, " ^{"
|
||||
NL, " //check block is completed"
|
||||
NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
|
||||
NL, " });"
|
||||
NL, " release_event(my_evt);"
|
||||
NL, " }"
|
||||
NL, ""
|
||||
NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);"
|
||||
NL, ""
|
||||
NL, " release_event(mix_ev[1]);"
|
||||
NL, " release_event(marker_evt);"
|
||||
NL, " release_event(mix_ev[0]);"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_block_with_mixed_events[] =
|
||||
{
|
||||
NL, "kernel void enqueue_block_with_mixed_events(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " int enq_res;"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " clk_event_t mix_ev[3];"
|
||||
NL, " mix_ev[0] = create_user_event();"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, " res[tid] = -2;"
|
||||
NL, ""
|
||||
NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; });"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, ""
|
||||
NL, " enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
|
||||
NL, ""
|
||||
NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; });"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -4; return; }"
|
||||
NL, ""
|
||||
NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);"
|
||||
NL, ""
|
||||
NL, " release_event(mix_ev[0]);"
|
||||
NL, " release_event(mix_ev[1]);"
|
||||
NL, " release_event(mix_ev[2]);"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const kernel_src sources_enqueue_block[] =
|
||||
{
|
||||
KERNEL(enqueue_simple_block),
|
||||
// Block with local mem
|
||||
KERNEL(enqueue_block_with_local_arg1),
|
||||
KERNEL(enqueue_block_with_local_arg2),
|
||||
KERNEL(enqueue_block_with_wait_list),
|
||||
KERNEL(enqueue_block_with_wait_list_and_local_arg),
|
||||
// WG size built-ins
|
||||
KERNEL(enqueue_block_get_kernel_work_group_size),
|
||||
KERNEL(enqueue_block_get_kernel_preferred_work_group_size_multiple),
|
||||
// Event profiling info
|
||||
KERNEL(enqueue_block_capture_event_profiling_info_after_execution),
|
||||
KERNEL(enqueue_block_capture_event_profiling_info_before_execution),
|
||||
// Marker
|
||||
KERNEL(enqueue_marker_with_block_event),
|
||||
KERNEL(enqueue_marker_with_user_event),
|
||||
// Mixed events
|
||||
KERNEL(enqueue_marker_with_mixed_events),
|
||||
KERNEL(enqueue_block_with_mixed_events),
|
||||
// Barrier
|
||||
KERNEL(enqueue_block_with_barrier),
|
||||
|
||||
};
|
||||
static const size_t num_kernels_enqueue_block = arr_size(sources_enqueue_block);
|
||||
|
||||
static int check_kernel_results(cl_int* results, cl_int len)
|
||||
{
|
||||
for(cl_int i = 0; i < len; ++i)
|
||||
{
|
||||
if(results[i] != 0) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int test_enqueue_block(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_uint i;
|
||||
cl_int n, err_ret, res = 0;
|
||||
clCommandQueueWrapper dev_queue;
|
||||
cl_int kernel_results[MAX_GWS] = {0};
|
||||
|
||||
size_t ret_len;
|
||||
cl_uint max_queues = 1;
|
||||
cl_uint maxQueueSize = 0;
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
|
||||
|
||||
size_t max_local_size = 1;
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
|
||||
|
||||
cl_queue_properties queue_prop_def[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT|CL_QUEUE_PROFILING_ENABLE,
|
||||
CL_QUEUE_SIZE, maxQueueSize,
|
||||
0
|
||||
};
|
||||
|
||||
dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
|
||||
test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
|
||||
|
||||
size_t global_size = MAX_GWS;
|
||||
size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size;
|
||||
if(gWimpyMode)
|
||||
{
|
||||
global_size = 4;
|
||||
local_size = 2;
|
||||
}
|
||||
|
||||
size_t failCnt = 0;
|
||||
for(i = 0; i < num_kernels_enqueue_block; ++i)
|
||||
{
|
||||
if (!gKernelName.empty() && gKernelName != sources_enqueue_block[i].kernel_name)
|
||||
continue;
|
||||
|
||||
log_info("Running '%s' kernel (%d of %d) ...\n", sources_enqueue_block[i].kernel_name, i + 1, num_kernels_enqueue_block);
|
||||
err_ret = run_n_kernel_args(context, queue, sources_enqueue_block[i].lines, sources_enqueue_block[i].num_lines, sources_enqueue_block[i].kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), 0, NULL);
|
||||
if(check_error(err_ret, "'%s' kernel execution failed", sources_enqueue_block[i].kernel_name)) { ++failCnt; res = -1; }
|
||||
else if((n = check_kernel_results(kernel_results, arr_size(kernel_results))) >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", sources_enqueue_block[i].kernel_name, n, kernel_results[n])) res = -1;
|
||||
else log_info("'%s' kernel is OK.\n", sources_enqueue_block[i].kernel_name);
|
||||
}
|
||||
|
||||
if (failCnt > 0)
|
||||
{
|
||||
log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_kernels_enqueue_block);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
756
test_conformance/device_execution/enqueue_flags.cpp
Normal file
756
test_conformance/device_execution/enqueue_flags.cpp
Normal file
@@ -0,0 +1,756 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "procs.h"
|
||||
#include "utils.h"
|
||||
#include <time.h>
|
||||
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
extern int gWimpyMode;
|
||||
#define BITS_DEPTH 28
|
||||
|
||||
static const char* enqueue_flags_wait_kernel_simple[] =
|
||||
{
|
||||
NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
|
||||
NL, ""
|
||||
NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " int val = 0;"
|
||||
NL, " size_t lid = get_local_id(0);"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
|
||||
NL, ""
|
||||
NL, " if(tid == 0)"
|
||||
NL, " {"
|
||||
NL, " if((index + 1) < BITS_DEPTH)"
|
||||
NL, " {"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
|
||||
NL, " ^{"
|
||||
NL, " block_fn(array, index + 1, ls, gs, res);"
|
||||
NL, " });"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, ""
|
||||
NL, " if((index + 1) == BITS_DEPTH)"
|
||||
NL, " {"
|
||||
NL, " barrier(CLK_LOCAL_MEM_FENCE);"
|
||||
NL, ""
|
||||
NL, " if(lid == 0)"
|
||||
NL, " {"
|
||||
NL, " size_t gid = get_group_id(0);"
|
||||
NL, " res[gid] = 1;"
|
||||
NL, ""
|
||||
NL, " for(int j = 0; j < BITS_DEPTH; j++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < ls; i++)"
|
||||
NL, " {"
|
||||
NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 2;"
|
||||
NL, " break;"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_flags_wait_kernel_simple(__global int* res, __global int* array)"
|
||||
NL, "{"
|
||||
NL, " size_t ls = get_local_size(0);"
|
||||
NL, " size_t gs = get_global_size(0);"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " res[tid] = 0;"
|
||||
NL, " array[tid] = tid;"
|
||||
NL, ""
|
||||
NL, " if(tid == 0)"
|
||||
NL, " {"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
|
||||
NL, " ^{"
|
||||
NL, " block_fn(array, 1, ls, gs, res);"
|
||||
NL, " });"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_flags_wait_kernel_event[] =
|
||||
{
|
||||
NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
|
||||
NL, ""
|
||||
NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " int val = 0;"
|
||||
NL, " size_t lid = get_local_id(0);"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
|
||||
NL, ""
|
||||
NL, " if(tid == 0)"
|
||||
NL, " {"
|
||||
NL, " if((index + 1) < BITS_DEPTH)"
|
||||
NL, " {"
|
||||
NL, " clk_event_t block_evt;"
|
||||
NL, " clk_event_t user_evt = create_user_event();"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
|
||||
NL, " ^{"
|
||||
NL, " block_fn(array, index + 1, ls, gs, res);"
|
||||
NL, " });"
|
||||
NL, " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL, " release_event(user_evt);"
|
||||
NL, " release_event(block_evt);"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, ""
|
||||
NL, " if((index + 1) == BITS_DEPTH)"
|
||||
NL, " {"
|
||||
NL, " barrier(CLK_LOCAL_MEM_FENCE);"
|
||||
NL, ""
|
||||
NL, " if(lid == 0)"
|
||||
NL, " {"
|
||||
NL, " size_t gid = get_group_id(0);"
|
||||
NL, " res[gid] = 1;"
|
||||
NL, ""
|
||||
NL, " for(int j = 0; j < BITS_DEPTH; j++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < ls; i++)"
|
||||
NL, " {"
|
||||
NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 2;"
|
||||
NL, " break;"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_flags_wait_kernel_event(__global int* res, __global int* array)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " size_t gs = get_global_size(0);"
|
||||
NL, " size_t ls = get_local_size(0);"
|
||||
NL, ""
|
||||
NL, " res[tid] = 0;"
|
||||
NL, " array[tid] = tid;"
|
||||
NL, ""
|
||||
NL, " if(tid == 0)"
|
||||
NL, " {"
|
||||
NL, " clk_event_t block_evt;"
|
||||
NL, " clk_event_t user_evt = create_user_event();"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
|
||||
NL, " ^{"
|
||||
NL, " block_fn(array, 1, ls, gs, res);"
|
||||
NL, " });"
|
||||
NL, " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL, " release_event(user_evt);"
|
||||
NL, " release_event(block_evt);"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_flags_wait_kernel_local[] =
|
||||
{
|
||||
NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
|
||||
NL, ""
|
||||
NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res, __local int* sub_array)"
|
||||
NL, "{"
|
||||
NL, " int val = 0;"
|
||||
NL, " size_t gid = get_group_id(0);"
|
||||
NL, " size_t lid = get_local_id(0);"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " sub_array[lid] = array[(index - 1) * gs + tid];"
|
||||
NL, " barrier(CLK_LOCAL_MEM_FENCE);"
|
||||
NL, ""
|
||||
NL, " for(int i = 0; i < ls; i++)"
|
||||
NL, " {"
|
||||
NL, " int id = gid * ls + i;"
|
||||
NL, " val += sub_array[i];"
|
||||
NL, " val -= (tid == id)? 0: (id + index - 1);"
|
||||
NL, " }"
|
||||
NL, " array[index * gs + tid] = val + 1;"
|
||||
NL, ""
|
||||
NL, " if(tid == 0)"
|
||||
NL, " {"
|
||||
NL, " if((index + 1) < BITS_DEPTH)"
|
||||
NL, " {"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
|
||||
NL, " ^(__local void* sub_array){"
|
||||
NL, " block_fn(array, index + 1, ls, gs, res, sub_array);"
|
||||
NL, " }, ls * sizeof(int));"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, ""
|
||||
NL, " if((index + 1) == BITS_DEPTH)"
|
||||
NL, " {"
|
||||
NL, " barrier(CLK_LOCAL_MEM_FENCE);"
|
||||
NL, ""
|
||||
NL, " if(lid == 0)"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 1;"
|
||||
NL, ""
|
||||
NL, " for(int j = 0; j < BITS_DEPTH; j++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < ls; i++)"
|
||||
NL, " {"
|
||||
NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 2;"
|
||||
NL, " break;"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_flags_wait_kernel_local(__global int* res, __global int* array)"
|
||||
NL, "{"
|
||||
NL, " size_t ls = get_local_size(0);"
|
||||
NL, " size_t gs = get_global_size(0);"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " res[tid] = 0;"
|
||||
NL, " array[tid] = tid;"
|
||||
NL, ""
|
||||
NL, " if(tid == 0)"
|
||||
NL, " {"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
|
||||
NL, " ^(__local void* sub_array){"
|
||||
NL, " block_fn(array, 1, ls, gs, res, sub_array);"
|
||||
NL, " }, ls * sizeof(int));"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_flags_wait_kernel_event_local[] =
|
||||
{
|
||||
NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
|
||||
NL, ""
|
||||
NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res, __local int* sub_array)"
|
||||
NL, "{"
|
||||
NL, " int val = 0;"
|
||||
NL, " size_t gid = get_group_id(0);"
|
||||
NL, " size_t lid = get_local_id(0);"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " sub_array[lid] = array[(index - 1) * gs + tid];"
|
||||
NL, " barrier(CLK_LOCAL_MEM_FENCE);"
|
||||
NL, ""
|
||||
NL, " for(int i = 0; i < ls; i++)"
|
||||
NL, " {"
|
||||
NL, " int id = gid * ls + i;"
|
||||
NL, " val += sub_array[i];"
|
||||
NL, " val -= (tid == id)? 0: (id + index - 1);"
|
||||
NL, " }"
|
||||
NL, " array[index * gs + tid] = val + 1;"
|
||||
NL, ""
|
||||
NL, " if(tid == 0)"
|
||||
NL, " {"
|
||||
NL, " if((index + 1) < BITS_DEPTH)"
|
||||
NL, " {"
|
||||
NL, " clk_event_t block_evt;"
|
||||
NL, " clk_event_t user_evt = create_user_event();"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
|
||||
NL, " ^(__local void* sub_array){"
|
||||
NL, " block_fn(array, index + 1, ls, gs, res, sub_array);"
|
||||
NL, " }, ls * sizeof(int));"
|
||||
NL, " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL, " release_event(user_evt);"
|
||||
NL, " release_event(block_evt);"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, ""
|
||||
NL, " if((index + 1) == BITS_DEPTH)"
|
||||
NL, " {"
|
||||
NL, " barrier(CLK_LOCAL_MEM_FENCE);"
|
||||
NL, ""
|
||||
NL, " if(lid == 0)"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 1;"
|
||||
NL, ""
|
||||
NL, " for(int j = 0; j < BITS_DEPTH; j++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < ls; i++)"
|
||||
NL, " {"
|
||||
NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 2;"
|
||||
NL, " break;"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_flags_wait_kernel_event_local(__global int* res, __global int* array)"
|
||||
NL, "{"
|
||||
NL, " size_t ls = get_local_size(0);"
|
||||
NL, " size_t gs = get_global_size(0);"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " res[tid] = 0;"
|
||||
NL, " array[tid] = tid;"
|
||||
NL, ""
|
||||
NL, " if(tid == 0)"
|
||||
NL, " {"
|
||||
NL, " clk_event_t block_evt;"
|
||||
NL, " clk_event_t user_evt = create_user_event();"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
|
||||
NL, " ^(__local void* sub_array){"
|
||||
NL, " block_fn(array, 1, ls, gs, res, sub_array);"
|
||||
NL, " }, ls * sizeof(int));"
|
||||
NL, " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL, " release_event(user_evt);"
|
||||
NL, " release_event(block_evt);"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_flags_wait_work_group_simple[] =
|
||||
{
|
||||
NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
|
||||
NL, ""
|
||||
NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, int group_id)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " size_t lid = get_local_id(0);"
|
||||
NL, " size_t gs = get_global_size(0);"
|
||||
NL, " size_t gid = get_group_id(0);"
|
||||
NL, " "
|
||||
NL, " if(gid == group_id)"
|
||||
NL, " {"
|
||||
NL, " array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
|
||||
NL, " "
|
||||
NL, " if((index + 1) < BITS_DEPTH && lid == 0)"
|
||||
NL, " {"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
|
||||
NL, " ^{"
|
||||
NL, " block_fn(array, index + 1, ls, res, gid);"
|
||||
NL, " });"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, ""
|
||||
NL, " if((index + 1) == BITS_DEPTH)"
|
||||
NL, " {"
|
||||
NL, " barrier(CLK_LOCAL_MEM_FENCE);"
|
||||
NL, ""
|
||||
NL, " if(lid == 0)"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 1;"
|
||||
NL, ""
|
||||
NL, " for(int j = 0; j < BITS_DEPTH; j++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < ls; i++)"
|
||||
NL, " {"
|
||||
NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 2;"
|
||||
NL, " break;"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_flags_wait_work_group_simple(__global int* res, __global int* array)"
|
||||
NL, "{"
|
||||
NL, " size_t ls = get_local_size(0);"
|
||||
NL, " size_t gs = get_global_size(0);"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " size_t gid = get_group_id(0);"
|
||||
NL, " size_t lid = get_local_id(0);"
|
||||
NL, ""
|
||||
NL, " res[tid] = 0;"
|
||||
NL, " array[tid] = tid;"
|
||||
NL, ""
|
||||
NL, " if(lid == 0)"
|
||||
NL, " {"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
|
||||
NL, " ^{"
|
||||
NL, " block_fn(array, 1, ls, res, gid);"
|
||||
NL, " });"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_flags_wait_work_group_event[] =
|
||||
{
|
||||
NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
|
||||
NL, ""
|
||||
NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, int group_id)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " size_t lid = get_local_id(0);"
|
||||
NL, " size_t gs = get_global_size(0);"
|
||||
NL, " size_t gid = get_group_id(0);"
|
||||
NL, " "
|
||||
NL, " if(gid == group_id)"
|
||||
NL, " {"
|
||||
NL, " array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
|
||||
NL, " "
|
||||
NL, " if((index + 1) < BITS_DEPTH && lid == 0)"
|
||||
NL, " {"
|
||||
NL, " clk_event_t block_evt;"
|
||||
NL, " clk_event_t user_evt = create_user_event();"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
|
||||
NL, " ^{"
|
||||
NL, " block_fn(array, index + 1, ls, res, gid);"
|
||||
NL, " });"
|
||||
NL, " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL, " release_event(user_evt);"
|
||||
NL, " release_event(block_evt);"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, ""
|
||||
NL, ""
|
||||
NL, " if((index + 1) == BITS_DEPTH)"
|
||||
NL, " {"
|
||||
NL, " barrier(CLK_LOCAL_MEM_FENCE);"
|
||||
NL, ""
|
||||
NL, " if(lid == 0)"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 1;"
|
||||
NL, ""
|
||||
NL, " for(int j = 0; j < BITS_DEPTH; j++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < ls; i++)"
|
||||
NL, " {"
|
||||
NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 2;"
|
||||
NL, " break;"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_flags_wait_work_group_event(__global int* res, __global int* array)"
|
||||
NL, "{"
|
||||
NL, " size_t ls = get_local_size(0);"
|
||||
NL, " size_t gs = get_global_size(0);"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " size_t gid = get_group_id(0);"
|
||||
NL, " size_t lid = get_local_id(0);"
|
||||
NL, ""
|
||||
NL, " res[tid] = 0;"
|
||||
NL, " array[tid] = tid;"
|
||||
NL, ""
|
||||
NL, " if(lid == 0)"
|
||||
NL, " {"
|
||||
NL, " clk_event_t block_evt;"
|
||||
NL, " clk_event_t user_evt = create_user_event();"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
|
||||
NL, " ^{"
|
||||
NL, " block_fn(array, 1, ls, res, gid);"
|
||||
NL, " });"
|
||||
NL, " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL, " release_event(user_evt);"
|
||||
NL, " release_event(block_evt);"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_flags_wait_work_group_local[] =
|
||||
{
|
||||
NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
|
||||
NL, ""
|
||||
NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, __local int* sub_array, int group_id)"
|
||||
NL, "{"
|
||||
NL, " int val = 0;"
|
||||
NL, " size_t gid = get_group_id(0);"
|
||||
NL, " size_t lid = get_local_id(0);"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " size_t gs = get_global_size(0);"
|
||||
NL, ""
|
||||
NL, " sub_array[lid] = array[(index - 1) * gs + tid];"
|
||||
NL, " barrier(CLK_LOCAL_MEM_FENCE);"
|
||||
NL, ""
|
||||
NL, " for(int i = 0; i < ls; i++)"
|
||||
NL, " {"
|
||||
NL, " int id = gid * ls + i;"
|
||||
NL, " val += sub_array[i];"
|
||||
NL, " val -= (tid == id)? 0: (id + index - 1);"
|
||||
NL, " }"
|
||||
NL, " "
|
||||
NL, " if(gid == group_id)"
|
||||
NL, " {"
|
||||
NL, " array[index * gs + tid] = val + 1;"
|
||||
NL, " "
|
||||
NL, " if((index + 1) < BITS_DEPTH && lid == 0)"
|
||||
NL, " {"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
|
||||
NL, " ^(__local void* sub_array){"
|
||||
NL, " block_fn(array, index + 1, ls, res, sub_array, gid);"
|
||||
NL, " }, ls * sizeof(int));"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, ""
|
||||
NL, ""
|
||||
NL, " if((index + 1) == BITS_DEPTH)"
|
||||
NL, " {"
|
||||
NL, " barrier(CLK_LOCAL_MEM_FENCE);"
|
||||
NL, ""
|
||||
NL, " if(lid == 0)"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 1;"
|
||||
NL, ""
|
||||
NL, " for(int j = 0; j < BITS_DEPTH; j++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < ls; i++)"
|
||||
NL, " {"
|
||||
NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 2;"
|
||||
NL, " break;"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_flags_wait_work_group_local(__global int* res, __global int* array)"
|
||||
NL, "{"
|
||||
NL, " size_t ls = get_local_size(0);"
|
||||
NL, " size_t gs = get_global_size(0);"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " size_t gid = get_group_id(0);"
|
||||
NL, " size_t lid = get_local_id(0);"
|
||||
NL, ""
|
||||
NL, " res[tid] = 0;"
|
||||
NL, " array[tid] = tid;"
|
||||
NL, ""
|
||||
NL, " if(lid == 0)"
|
||||
NL, " {"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
|
||||
NL, " ^(__local void* sub_array){"
|
||||
NL, " block_fn(array, 1, ls, res, sub_array, gid);"
|
||||
NL, " }, ls * sizeof(int));"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_flags_wait_work_group_event_local[] =
|
||||
{
|
||||
NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
|
||||
NL, ""
|
||||
NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, __local int* sub_array, int group_id)"
|
||||
NL, "{"
|
||||
NL, " int val = 0;"
|
||||
NL, " size_t gid = get_group_id(0);"
|
||||
NL, " size_t lid = get_local_id(0);"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " size_t gs = get_global_size(0);"
|
||||
NL, ""
|
||||
NL, " sub_array[lid] = array[(index - 1) * gs + tid];"
|
||||
NL, " barrier(CLK_LOCAL_MEM_FENCE);"
|
||||
NL, ""
|
||||
NL, " for(int i = 0; i < ls; i++)"
|
||||
NL, " {"
|
||||
NL, " int id = gid * ls + i;"
|
||||
NL, " val += sub_array[i];"
|
||||
NL, " val -= (tid == id)? 0: (id + index - 1);"
|
||||
NL, " }"
|
||||
NL, ""
|
||||
NL, " if(gid == group_id)"
|
||||
NL, " {"
|
||||
NL, " array[index * gs + tid] = val + 1;"
|
||||
NL, " "
|
||||
NL, " if((index + 1) < BITS_DEPTH && lid == 0)"
|
||||
NL, " {"
|
||||
NL, " clk_event_t block_evt;"
|
||||
NL, " clk_event_t user_evt = create_user_event();"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
|
||||
NL, " ^(__local void* sub_array){"
|
||||
NL, " block_fn(array, index + 1, ls, res, sub_array, gid);"
|
||||
NL, " }, ls * sizeof(int));"
|
||||
NL, " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL, " release_event(user_evt);"
|
||||
NL, " release_event(block_evt);"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, ""
|
||||
NL, " if((index + 1) == BITS_DEPTH)"
|
||||
NL, " {"
|
||||
NL, " barrier(CLK_LOCAL_MEM_FENCE);"
|
||||
NL, ""
|
||||
NL, " if(lid == 0)"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 1;"
|
||||
NL, ""
|
||||
NL, " for(int j = 0; j < BITS_DEPTH; j++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < ls; i++)"
|
||||
NL, " {"
|
||||
NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
|
||||
NL, " {"
|
||||
NL, " res[gid] = 2;"
|
||||
NL, " break;"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_flags_wait_work_group_event_local(__global int* res, __global int* array)"
|
||||
NL, "{"
|
||||
NL, " size_t ls = get_local_size(0);"
|
||||
NL, " size_t gs = get_global_size(0);"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " size_t gid = get_group_id(0);"
|
||||
NL, " size_t lid = get_local_id(0);"
|
||||
NL, ""
|
||||
NL, " res[tid] = 0;"
|
||||
NL, " array[tid] = tid;"
|
||||
NL, ""
|
||||
NL, " if(lid == 0)"
|
||||
NL, " {"
|
||||
NL, " clk_event_t block_evt;"
|
||||
NL, " clk_event_t user_evt = create_user_event();"
|
||||
NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
|
||||
NL, " ^(__local void* sub_array){"
|
||||
NL, " block_fn(array, 1, ls, res, sub_array, gid);"
|
||||
NL, " }, ls * sizeof(int));"
|
||||
NL, " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL, " release_event(user_evt);"
|
||||
NL, " release_event(block_evt);"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const kernel_src sources_enqueue_block_flags[] =
|
||||
{
|
||||
KERNEL(enqueue_flags_wait_kernel_simple),
|
||||
KERNEL(enqueue_flags_wait_kernel_event),
|
||||
KERNEL(enqueue_flags_wait_kernel_local),
|
||||
KERNEL(enqueue_flags_wait_kernel_event_local),
|
||||
KERNEL(enqueue_flags_wait_work_group_simple),
|
||||
KERNEL(enqueue_flags_wait_work_group_event),
|
||||
KERNEL(enqueue_flags_wait_work_group_local),
|
||||
KERNEL(enqueue_flags_wait_work_group_event_local)
|
||||
};
|
||||
static const size_t num_enqueue_block_flags = arr_size(sources_enqueue_block_flags);
|
||||
|
||||
|
||||
int test_enqueue_flags(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_uint i;
|
||||
cl_int err_ret, res = 0;
|
||||
clCommandQueueWrapper dev_queue;
|
||||
cl_int kernel_results[MAX_GWS] = { -1 };
|
||||
int buff[MAX_GWS * BITS_DEPTH] = { 0 };
|
||||
|
||||
size_t ret_len;
|
||||
size_t max_local_size = 1;
|
||||
cl_uint maxQueueSize = 0;
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
|
||||
|
||||
cl_queue_properties queue_prop_def[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
|
||||
CL_QUEUE_SIZE, maxQueueSize,
|
||||
0
|
||||
};
|
||||
|
||||
dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
|
||||
test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
|
||||
|
||||
size_t global_size = MAX_GWS;
|
||||
size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size;
|
||||
if(gWimpyMode)
|
||||
{
|
||||
global_size = 4;
|
||||
local_size = 2;
|
||||
}
|
||||
|
||||
size_t failCnt = 0;
|
||||
for(i = 0; i < num_enqueue_block_flags; ++i)
|
||||
{
|
||||
if (!gKernelName.empty() && gKernelName != sources_enqueue_block_flags[i].kernel_name)
|
||||
continue;
|
||||
|
||||
log_info("Running '%s' kernel (%d of %d) ...\n", sources_enqueue_block_flags[i].kernel_name, i + 1, num_enqueue_block_flags);
|
||||
|
||||
clMemWrapper mem = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, global_size * BITS_DEPTH * sizeof(cl_int), buff, &err_ret);
|
||||
test_error(err_ret, "clCreateBuffer() failed");
|
||||
|
||||
kernel_arg args[] =
|
||||
{
|
||||
{ sizeof(cl_mem), &mem }
|
||||
};
|
||||
|
||||
err_ret = run_n_kernel_args(context, queue, sources_enqueue_block_flags[i].lines, sources_enqueue_block_flags[i].num_lines, sources_enqueue_block_flags[i].kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), arr_size(args), args);
|
||||
if(check_error(err_ret, "'%s' kernel execution failed", sources_enqueue_block_flags[i].kernel_name)) { ++failCnt; res = -1; }
|
||||
else
|
||||
{
|
||||
int r = 0;
|
||||
for (int j=0; j<global_size; j++)
|
||||
{
|
||||
if (kernel_results[j] != 1 && j < (global_size / local_size) && check_error(-1, "'%s' kernel result[idx: %d] validation failed (test) %d != (expected) 1", sources_enqueue_block_flags[i].kernel_name, j, kernel_results[j]))
|
||||
{
|
||||
r = -1;
|
||||
break;
|
||||
}
|
||||
else if (kernel_results[j] != 0 && j >= (global_size / local_size) && check_error(-1, "'%s' kernel result[idx: %d] validation failed (test) %d != (expected) 0", sources_enqueue_block_flags[i].kernel_name, j, kernel_results[j]))
|
||||
{
|
||||
r = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(r == 0) log_info("'%s' kernel is OK.\n", sources_enqueue_block_flags[i].kernel_name);
|
||||
else res = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (failCnt > 0)
|
||||
{
|
||||
log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_enqueue_block_flags);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
198
test_conformance/device_execution/enqueue_multi_queue.cpp
Normal file
198
test_conformance/device_execution/enqueue_multi_queue.cpp
Normal file
@@ -0,0 +1,198 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "procs.h"
|
||||
#include "utils.h"
|
||||
#include <time.h>
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
extern int gWimpyMode;
|
||||
static const char enqueue_block_multi_queue[] =
|
||||
NL "#define BLOCK_COMPLETED 0"
|
||||
NL "#define BLOCK_SUBMITTED 1"
|
||||
NL ""
|
||||
NL "kernel void enqueue_block_multi_queue(__global int* res, __global int* buff %s)"
|
||||
NL "{"
|
||||
NL " uint i, n = %d;"
|
||||
NL " clk_event_t block_evt[%d];"
|
||||
NL " queue_t q[] = { %s };"
|
||||
NL " queue_t *queue = q;"
|
||||
NL ""
|
||||
NL " clk_event_t user_evt = create_user_event();"
|
||||
NL " queue_t def_q = get_default_queue();"
|
||||
NL " size_t tid = get_global_id(0);"
|
||||
NL " res[tid] = -1;"
|
||||
NL " __global int* b = buff + tid*n;"
|
||||
NL " for(i=0; i<n; ++i) b[i] = -1;"
|
||||
NL ""
|
||||
NL " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL " for(i = 0; i < n; ++i)"
|
||||
NL " {"
|
||||
NL " b[i] = BLOCK_SUBMITTED;"
|
||||
NL " int enq_res = enqueue_kernel(queue[i], CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt[i], "
|
||||
NL " ^{"
|
||||
NL " b[i] = BLOCK_COMPLETED;"
|
||||
NL " });"
|
||||
NL " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }"
|
||||
NL " }"
|
||||
NL ""
|
||||
NL " // check blocks are not started"
|
||||
NL " for(i = 0; i < n; ++i)"
|
||||
NL " {"
|
||||
NL " if(b[i] != BLOCK_SUBMITTED) { res[tid] = -5; return; }"
|
||||
NL " }"
|
||||
NL ""
|
||||
NL " res[tid] = BLOCK_SUBMITTED;"
|
||||
NL " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, n, block_evt, NULL, "
|
||||
NL " ^{"
|
||||
NL " uint k;"
|
||||
NL " // check blocks are finished"
|
||||
NL " for(k = 0; k < n; ++k)"
|
||||
NL " {"
|
||||
NL " if(b[k] != BLOCK_COMPLETED) { res[tid] = -3; return; }"
|
||||
NL " }"
|
||||
NL " res[tid] = BLOCK_COMPLETED;"
|
||||
NL " });"
|
||||
NL " for(i = 0; i < n; ++i)"
|
||||
NL " {"
|
||||
NL " release_event(block_evt[i]);"
|
||||
NL " }"
|
||||
NL " if(enq_res != CLK_SUCCESS) { res[tid] = -4; return; }"
|
||||
NL ""
|
||||
NL " set_user_event_status(user_evt, CL_COMPLETE);"
|
||||
NL " release_event(user_evt);"
|
||||
NL "}";
|
||||
|
||||
|
||||
static int check_kernel_results(cl_int* results, cl_int len)
|
||||
{
|
||||
for(cl_int i = 0; i < len; ++i)
|
||||
{
|
||||
if(results[i] != 0) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int test_enqueue_multi_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_uint i;
|
||||
cl_int k, err_ret, res = 0;
|
||||
clCommandQueueWrapper dev_queue;
|
||||
cl_int kernel_results[MAX_GWS] = {0};
|
||||
|
||||
size_t ret_len;
|
||||
cl_uint n, max_queues = 1;
|
||||
cl_uint maxQueueSize = 0;
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
|
||||
|
||||
size_t max_local_size = 1;
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
|
||||
|
||||
cl_queue_properties queue_prop_def[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
|
||||
CL_QUEUE_SIZE, maxQueueSize,
|
||||
0
|
||||
};
|
||||
|
||||
dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
|
||||
test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
|
||||
|
||||
if(max_queues > 1)
|
||||
{
|
||||
n = (max_queues > MAX_QUEUES) ? MAX_QUEUES : max_queues-1;
|
||||
clMemWrapper mem, buff, evt;
|
||||
std::vector<clCommandQueueWrapper> queues(n);
|
||||
std::vector<cl_command_queue> q(n);
|
||||
cl_queue_properties queue_prop[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE,
|
||||
CL_QUEUE_SIZE, maxQueueSize,
|
||||
0
|
||||
};
|
||||
|
||||
for(i = 0; i < n; ++i)
|
||||
{
|
||||
queues[i] = clCreateCommandQueueWithProperties(context, device, queue_prop, &err_ret);
|
||||
test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE) failed");
|
||||
q[i] = queues[i];
|
||||
}
|
||||
|
||||
size_t global_size = MAX_GWS;
|
||||
size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size;
|
||||
if(gWimpyMode)
|
||||
{
|
||||
global_size = 4;
|
||||
local_size = 2;
|
||||
}
|
||||
|
||||
evt = clCreateBuffer(context, CL_MEM_READ_WRITE, n * sizeof(cl_event), NULL, &err_ret);
|
||||
test_error(err_ret, "clCreateBuffer() failed");
|
||||
|
||||
mem = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, n * sizeof(cl_command_queue), &q[0], &err_ret);
|
||||
test_error(err_ret, "clCreateBuffer() failed");
|
||||
|
||||
buff = clCreateBuffer(context, CL_MEM_READ_WRITE, global_size * n * sizeof(cl_int), NULL, &err_ret);
|
||||
test_error(err_ret, "clCreateBuffer() failed");
|
||||
|
||||
// Prepare CL source
|
||||
char cl[65536] = { 0 };
|
||||
char q_args[16384] = { 0 };
|
||||
char q_list[8192] = { 0 };
|
||||
|
||||
kernel_arg arg_res = { sizeof(cl_mem), &buff };
|
||||
|
||||
std::vector<kernel_arg> args(n+1);
|
||||
args[0] = arg_res;
|
||||
|
||||
for(i = 0; i < n; ++i)
|
||||
{
|
||||
snprintf(q_args+strlen(q_args), sizeof(q_args)-strlen(q_args)-1, ", queue_t q%d", i);
|
||||
snprintf(q_list+strlen(q_list), sizeof(q_list)-strlen(q_list)-1, "q%d, ", i);
|
||||
kernel_arg arg_q = { sizeof(cl_command_queue), &q[i] };
|
||||
args[i+1] = arg_q;
|
||||
}
|
||||
|
||||
snprintf(cl, sizeof(cl)-1, enqueue_block_multi_queue, q_args, n, n, q_list);
|
||||
const char *source = cl;
|
||||
|
||||
err_ret = run_n_kernel_args(context, queue, &source, 1, "enqueue_block_multi_queue", local_size, global_size, kernel_results, sizeof(kernel_results), args.size(), &args[0]);
|
||||
if(check_error(err_ret, "'%s' kernel execution failed", "enqueue_block_multi_queue")) res = -1;
|
||||
else if((k = check_kernel_results(kernel_results, arr_size(kernel_results))) >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", "enqueue_block_multi_queue", k, kernel_results[k])) res = -1;
|
||||
else log_info("'%s' kernel is OK.\n", "enqueue_block_multi_queue");
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
681
test_conformance/device_execution/enqueue_ndrange.cpp
Normal file
681
test_conformance/device_execution/enqueue_ndrange.cpp
Normal file
@@ -0,0 +1,681 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "procs.h"
|
||||
#include "utils.h"
|
||||
#include <time.h>
|
||||
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
extern int gWimpyMode;
|
||||
static const char* helper_ndrange_1d_glo[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* helper_ndrange_1d_loc[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int k = 0; k < n; k++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " if (glob_size_arr[i] >= loc_size_arr[k])"
|
||||
NL, " {"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(glob_size_arr[i], loc_size_arr[k]);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* helper_ndrange_1d_ofs[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[(get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int l = 0; l < n; l++)"
|
||||
NL, " {"
|
||||
NL, " for(int k = 0; k < n; k++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " if (glob_size_arr[i] >= loc_size_arr[k])"
|
||||
NL, " {"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], loc_size_arr[k]);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* helper_ndrange_2d_glo[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
|
||||
NL, " ndrange_t ndrange = ndrange_2D(glob_size);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* helper_ndrange_2d_loc[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int k = 0; k < n; k++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
|
||||
NL, " {"
|
||||
NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
|
||||
NL, " size_t loc_size[] = { 1, loc_size_arr[k] };"
|
||||
NL, ""
|
||||
NL, " ndrange_t ndrange = ndrange_2D(glob_size, loc_size);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
|
||||
static const char* helper_ndrange_2d_ofs[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[(get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int l = 0; l < n; l++)"
|
||||
NL, " {"
|
||||
NL, " for(int k = 0; k < n; k++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
|
||||
NL, " {"
|
||||
NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n]};"
|
||||
NL, " size_t loc_size[] = { 1, loc_size_arr[k] };"
|
||||
NL, " size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };"
|
||||
NL, ""
|
||||
NL, " ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
|
||||
static const char* helper_ndrange_3d_glo[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
|
||||
NL, " if (global_work_size <= (len * len))"
|
||||
NL, " {"
|
||||
NL, " size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
|
||||
NL, " ndrange_t ndrange = ndrange_3D(glob_size);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
|
||||
static const char* helper_ndrange_3d_loc[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int k = 0; k < n; k++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
|
||||
NL, " if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
|
||||
NL, " {"
|
||||
NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
|
||||
NL, " size_t loc_size[] = { 1, 1, loc_size_arr[k] };"
|
||||
NL, " ndrange_t ndrange = ndrange_3D(glob_size,loc_size);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " "
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* helper_ndrange_3d_ofs[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[(get_global_offset(2) * get_global_size(0) * get_global_size(1) + get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int l = 0; l < n; l++)"
|
||||
NL, " {"
|
||||
NL, " for(int k = 0; k < n; k++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
|
||||
NL, " if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
|
||||
NL, " {"
|
||||
NL, " size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n]};"
|
||||
NL, " size_t loc_size[3] = { 1, 1, loc_size_arr[k] };"
|
||||
NL, " size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l + 2) % n] };"
|
||||
NL, " ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const kernel_src_dim_check sources_ndrange_Xd[] =
|
||||
{
|
||||
{ KERNEL(helper_ndrange_1d_glo), 1, CL_FALSE, CL_FALSE},
|
||||
{ KERNEL(helper_ndrange_1d_loc), 1, CL_TRUE, CL_FALSE},
|
||||
{ KERNEL(helper_ndrange_1d_ofs), 1, CL_TRUE, CL_TRUE},
|
||||
{ KERNEL(helper_ndrange_2d_glo), 2, CL_FALSE, CL_FALSE},
|
||||
{ KERNEL(helper_ndrange_2d_loc), 2, CL_TRUE, CL_FALSE},
|
||||
{ KERNEL(helper_ndrange_2d_ofs), 2, CL_TRUE, CL_TRUE},
|
||||
{ KERNEL(helper_ndrange_3d_glo), 3, CL_FALSE, CL_FALSE},
|
||||
{ KERNEL(helper_ndrange_3d_loc), 3, CL_TRUE, CL_FALSE},
|
||||
{ KERNEL(helper_ndrange_3d_ofs), 3, CL_TRUE, CL_TRUE},
|
||||
};
|
||||
static const size_t num_kernels_ndrange_Xd = arr_size(sources_ndrange_Xd);
|
||||
|
||||
static int check_kernel_results(cl_int* results, cl_int len)
|
||||
{
|
||||
for(cl_int i = 0; i < len; ++i)
|
||||
{
|
||||
if(results[i] != 0) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void generate_reference_1D(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr)
|
||||
{
|
||||
for (size_t g = 0; g < glob_size_arr.size(); ++g)
|
||||
{
|
||||
for (size_t w = 0; w < glob_size_arr[g]; ++w)
|
||||
{
|
||||
++reference_results[w];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void generate_reference_1D_local(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr)
|
||||
{
|
||||
for (size_t g = 0; g < glob_size_arr.size(); ++g)
|
||||
{
|
||||
for (size_t l = 0; l < loc_size_arr.size(); ++l)
|
||||
{
|
||||
if (glob_size_arr[g] >= loc_size_arr[l])
|
||||
{
|
||||
for (size_t w = 0; w < glob_size_arr[g]; ++w)
|
||||
{
|
||||
++reference_results[w];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void generate_reference_1D_offset(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr, std::vector<cl_uint> &offset, cl_uint len)
|
||||
{
|
||||
for (size_t g = 0; g < glob_size_arr.size(); ++g)
|
||||
{
|
||||
for (size_t l = 0; l < loc_size_arr.size(); ++l)
|
||||
{
|
||||
if (glob_size_arr[g] >= loc_size_arr[l])
|
||||
{
|
||||
for (size_t o = 0; o < offset.size(); ++o)
|
||||
{
|
||||
for (size_t w = 0; w < glob_size_arr[g]; ++w)
|
||||
{
|
||||
++reference_results[(offset[o] + w) % len];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void generate_reference_2D(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, cl_uint len)
|
||||
{
|
||||
for (size_t g = 0; g < glob_size_arr.size(); ++g)
|
||||
{
|
||||
for (size_t h = 0; h < glob_size_arr[(g + 1) % glob_size_arr.size()]; ++h)
|
||||
{
|
||||
for (size_t w = 0; w < glob_size_arr[g]; ++w)
|
||||
{
|
||||
++reference_results[(h * glob_size_arr[g] + w) % len];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void generate_reference_2D_local(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr, cl_uint len)
|
||||
{
|
||||
size_t n = glob_size_arr.size();
|
||||
for (size_t g = 0; g < glob_size_arr.size(); ++g)
|
||||
{
|
||||
for (size_t l = 0; l < loc_size_arr.size(); ++l)
|
||||
{
|
||||
if (glob_size_arr[(g + 1) % n] >= loc_size_arr[l])
|
||||
{
|
||||
for (size_t h = 0; h < glob_size_arr[(g + 1) % n]; ++h)
|
||||
{
|
||||
for (size_t w = 0; w < glob_size_arr[g]; ++w)
|
||||
{
|
||||
++reference_results[(h * glob_size_arr[g] + w) % len];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void generate_reference_2D_offset(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr, std::vector<cl_uint> &offset, cl_uint len)
|
||||
{
|
||||
size_t n = glob_size_arr.size();
|
||||
for (size_t g = 0; g < glob_size_arr.size(); ++g)
|
||||
{
|
||||
for (size_t l = 0; l < loc_size_arr.size(); ++l)
|
||||
{
|
||||
if (glob_size_arr[(g + 1) % n] >= loc_size_arr[l])
|
||||
{
|
||||
for (size_t o = 0; o < offset.size(); ++o)
|
||||
{
|
||||
for (size_t h = 0; h < glob_size_arr[(g + 1) % n]; ++h)
|
||||
{
|
||||
for (size_t w = 0; w < glob_size_arr[g]; ++w)
|
||||
{
|
||||
++reference_results[(glob_size_arr[g] * offset[(o + 1) % n] + offset[o] + h * glob_size_arr[g] + w) % len];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void generate_reference_3D(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, cl_uint len)
|
||||
{
|
||||
size_t n = glob_size_arr.size();
|
||||
for (size_t g = 0; g < glob_size_arr.size(); ++g)
|
||||
{
|
||||
size_t global_work_size = glob_size_arr[(g + 2) % n] * glob_size_arr[(g + 1) % n] * glob_size_arr[g];
|
||||
if(global_work_size <= (len * len))
|
||||
{
|
||||
for (size_t d = 0; d < glob_size_arr[(g + 2) % n]; ++d)
|
||||
{
|
||||
for (size_t h = 0; h < glob_size_arr[(g + 1) % n]; ++h)
|
||||
{
|
||||
for (size_t w = 0; w < glob_size_arr[g]; ++w)
|
||||
{
|
||||
++reference_results[(d * glob_size_arr[(g + 1) % n] * glob_size_arr[g] + h * glob_size_arr[g] + w) % len];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void generate_reference_3D_local(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr, cl_uint len)
|
||||
{
|
||||
size_t n = glob_size_arr.size();
|
||||
for (size_t g = 0; g < glob_size_arr.size(); ++g)
|
||||
{
|
||||
for (size_t l = 0; l < loc_size_arr.size(); ++l)
|
||||
{
|
||||
size_t global_work_size = glob_size_arr[(g + 2) % n] * glob_size_arr[(g + 1) % n] * glob_size_arr[g];
|
||||
if (glob_size_arr[(g + 2) % n] >= loc_size_arr[l] && global_work_size <= (len * len))
|
||||
{
|
||||
for (size_t d = 0; d < glob_size_arr[(g + 2) % n]; ++d)
|
||||
{
|
||||
for (size_t h = 0; h < glob_size_arr[(g + 1) % n]; ++h)
|
||||
{
|
||||
for (size_t w = 0; w < glob_size_arr[g]; ++w)
|
||||
{
|
||||
++reference_results[(d * glob_size_arr[(g + 1) % n] * glob_size_arr[g] + h * glob_size_arr[g] + w) % len];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void generate_reference_3D_offset(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr, std::vector<cl_uint> &offset, cl_uint len)
|
||||
{
|
||||
size_t n = glob_size_arr.size();
|
||||
for (size_t g = 0; g < glob_size_arr.size(); ++g)
|
||||
{
|
||||
for (size_t l = 0; l < loc_size_arr.size(); ++l)
|
||||
{
|
||||
size_t global_work_size = glob_size_arr[(g + 2) % n] * glob_size_arr[(g + 1) % n] * glob_size_arr[g];
|
||||
if (glob_size_arr[(g + 2) % n] >= loc_size_arr[l] && global_work_size <= (len * len))
|
||||
{
|
||||
for (size_t o = 0; o < offset.size(); ++o)
|
||||
{
|
||||
for (size_t d = 0; d < glob_size_arr[(g + 2) % n]; ++d)
|
||||
{
|
||||
for (size_t h = 0; h < glob_size_arr[(g + 1) % n]; ++h)
|
||||
{
|
||||
for (size_t w = 0; w < glob_size_arr[g]; ++w)
|
||||
{
|
||||
++reference_results[(glob_size_arr[g] * glob_size_arr[(g + 1) % n] * offset[(o + 2) % n] + glob_size_arr[g] * offset[(o + 1) % n] + offset[o] + d * glob_size_arr[(g + 1) % n] * glob_size_arr[g] + h * glob_size_arr[g] + w) % len];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int check_kernel_results(cl_int* results, cl_int len, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr, std::vector<cl_uint> &offset, cl_int dim, cl_bool use_local, cl_bool use_offset)
|
||||
{
|
||||
std::vector<cl_int> reference_results(len, 0);
|
||||
switch (dim)
|
||||
{
|
||||
case 1:
|
||||
if (use_local == CL_FALSE)
|
||||
{
|
||||
generate_reference_1D(reference_results, glob_size_arr);
|
||||
}
|
||||
else if(use_local == CL_TRUE && use_offset == CL_FALSE)
|
||||
{
|
||||
generate_reference_1D_local(reference_results, glob_size_arr, loc_size_arr);
|
||||
}
|
||||
else
|
||||
{
|
||||
generate_reference_1D_offset(reference_results, glob_size_arr, loc_size_arr, offset, len);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (use_local == CL_FALSE)
|
||||
{
|
||||
generate_reference_2D(reference_results, glob_size_arr, len);
|
||||
}
|
||||
else if (use_local == CL_TRUE && use_offset == CL_FALSE)
|
||||
{
|
||||
generate_reference_2D_local(reference_results, glob_size_arr, loc_size_arr, len);
|
||||
}
|
||||
else
|
||||
{
|
||||
generate_reference_2D_offset(reference_results, glob_size_arr, loc_size_arr, offset, len);
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
if (use_local == CL_FALSE)
|
||||
{
|
||||
generate_reference_3D(reference_results, glob_size_arr, len);
|
||||
}
|
||||
else if (use_local == CL_TRUE && use_offset == CL_FALSE)
|
||||
{
|
||||
generate_reference_3D_local(reference_results, glob_size_arr, loc_size_arr, len);
|
||||
}
|
||||
else
|
||||
{
|
||||
generate_reference_3D_offset(reference_results, glob_size_arr, loc_size_arr, offset, len);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
|
||||
for (cl_int i = 0; i < len; ++i)
|
||||
{
|
||||
if (results[i] != reference_results[i])
|
||||
{
|
||||
log_error("ERROR: Kernel returned %d vs. expected %d\n", results[i], reference_results[i]);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int test_enqueue_ndrange(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
MTdata d;
|
||||
cl_uint i;
|
||||
cl_int err_ret, res = 0;
|
||||
clCommandQueueWrapper dev_queue;
|
||||
cl_int k, kernel_results[MAX_GWS] = { 0 };
|
||||
|
||||
size_t ret_len;
|
||||
cl_uint max_queues = 1;
|
||||
cl_uint maxQueueSize = 0;
|
||||
|
||||
d = init_genrand(gRandomSeed);
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
|
||||
|
||||
size_t max_local_size = 1;
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
|
||||
|
||||
cl_queue_properties queue_prop_def[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
|
||||
CL_QUEUE_SIZE, maxQueueSize,
|
||||
0
|
||||
};
|
||||
|
||||
dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
|
||||
test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
|
||||
|
||||
max_local_size = (max_local_size > MAX_GWS)? MAX_GWS: max_local_size;
|
||||
if(gWimpyMode)
|
||||
{
|
||||
max_local_size = MIN(8, max_local_size);
|
||||
}
|
||||
|
||||
cl_uint num = 10;
|
||||
cl_uint global_work_size = max_local_size * 2;
|
||||
std::vector<cl_uint> glob_size_arr(num);
|
||||
std::vector<cl_uint> loc_size_arr(num);
|
||||
std::vector<cl_uint> ofs_arr(num);
|
||||
std::vector<cl_int> glob_results(global_work_size, 0);
|
||||
|
||||
glob_size_arr[0] = 1;
|
||||
glob_size_arr[1] = global_work_size;
|
||||
loc_size_arr[0] = 1;
|
||||
loc_size_arr[1] = max_local_size;
|
||||
ofs_arr[0] = 0;
|
||||
ofs_arr[1] = 1;
|
||||
|
||||
for(i = 2; i < num; ++i)
|
||||
{
|
||||
glob_size_arr[i] = genrand_int32(d) % global_work_size;
|
||||
glob_size_arr[i] = glob_size_arr[i] ? glob_size_arr[i]: 1;
|
||||
loc_size_arr[i] = genrand_int32(d) % max_local_size;
|
||||
loc_size_arr[i] = loc_size_arr[i] ? loc_size_arr[i]: 1;
|
||||
ofs_arr[i] = genrand_int32(d) % global_work_size;
|
||||
}
|
||||
|
||||
// check ndrange_dX functions
|
||||
size_t failCnt = 0;
|
||||
for(i = 0; i < num_kernels_ndrange_Xd; ++i)
|
||||
{
|
||||
if (!gKernelName.empty() && gKernelName != sources_ndrange_Xd[i].src.kernel_name)
|
||||
continue;
|
||||
|
||||
clMemWrapper mem1 = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, glob_size_arr.size() * sizeof(cl_uint), &glob_size_arr[0], &err_ret);
|
||||
test_error(err_ret, "clCreateBuffer() failed");
|
||||
clMemWrapper mem2 = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, loc_size_arr.size() * sizeof(cl_uint), &loc_size_arr[0], &err_ret);
|
||||
test_error(err_ret, "clCreateBuffer() failed");
|
||||
clMemWrapper mem3 = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, glob_results.size() * sizeof(cl_int), &glob_results[0], &err_ret);
|
||||
test_error(err_ret, "clCreateBuffer() failed");
|
||||
clMemWrapper mem4 = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, ofs_arr.size() * sizeof(cl_uint), &ofs_arr[0], &err_ret);
|
||||
test_error(err_ret, "clCreateBuffer() failed");
|
||||
|
||||
kernel_arg args[] =
|
||||
{
|
||||
{ sizeof(cl_uint), &num },
|
||||
{ sizeof(cl_uint), &global_work_size },
|
||||
{ sizeof(cl_mem), &mem1 },
|
||||
{ sizeof(cl_mem), &mem2 },
|
||||
{ sizeof(cl_mem), &mem3 },
|
||||
{ sizeof(cl_mem), &mem4 },
|
||||
};
|
||||
|
||||
log_info("Running '%s' kernel (%d of %d) ...\n", sources_ndrange_Xd[i].src.kernel_name, i + 1, num_kernels_ndrange_Xd);
|
||||
err_ret = run_single_kernel_args(context, queue, sources_ndrange_Xd[i].src.lines, sources_ndrange_Xd[i].src.num_lines, sources_ndrange_Xd[i].src.kernel_name, kernel_results, sizeof(kernel_results), arr_size(args), args);
|
||||
|
||||
cl_int *ptr = (cl_int *)clEnqueueMapBuffer(queue, mem3, CL_TRUE, CL_MAP_READ, 0, glob_results.size() * sizeof(cl_int), 0, 0, 0, &err_ret);
|
||||
test_error(err_ret, "clEnqueueMapBuffer() failed");
|
||||
|
||||
if(check_error(err_ret, "'%s' kernel execution failed", sources_ndrange_Xd[i].src.kernel_name)) { ++failCnt; res = -1; }
|
||||
else if((k = check_kernel_results(kernel_results, arr_size(kernel_results))) >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", sources_ndrange_Xd[i].src.kernel_name, k, kernel_results[k])) res = -1;
|
||||
else if((k = check_kernel_results(ptr, global_work_size, glob_size_arr, loc_size_arr, ofs_arr, sources_ndrange_Xd[i].dim, sources_ndrange_Xd[i].localSize, sources_ndrange_Xd[i].offset)) >= 0 && check_error(-1, "'%s' global kernel results validation failed: [%d] returned %d expected 0", sources_ndrange_Xd[i].src.kernel_name, k, glob_results[k])) res = -1;
|
||||
else log_info("'%s' kernel is OK.\n", sources_ndrange_Xd[i].src.kernel_name);
|
||||
|
||||
err_ret = clEnqueueUnmapMemObject(queue, mem3, ptr, 0, 0, 0);
|
||||
test_error(err_ret, "clEnqueueUnmapMemObject() failed");
|
||||
|
||||
}
|
||||
|
||||
if (failCnt > 0)
|
||||
{
|
||||
log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_kernels_ndrange_Xd);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
1724
test_conformance/device_execution/enqueue_wg_size.cpp
Normal file
1724
test_conformance/device_execution/enqueue_wg_size.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1050
test_conformance/device_execution/execute_block.cpp
Normal file
1050
test_conformance/device_execution/execute_block.cpp
Normal file
File diff suppressed because it is too large
Load Diff
228
test_conformance/device_execution/host_multi_queue.cpp
Normal file
228
test_conformance/device_execution/host_multi_queue.cpp
Normal file
@@ -0,0 +1,228 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "procs.h"
|
||||
#include "utils.h"
|
||||
#include <time.h>
|
||||
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
extern int gWimpyMode;
|
||||
static const char* multi_queue_simple_block1[] =
|
||||
{
|
||||
NL, "void block_fn(size_t tid, int mul, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " res[tid] = mul * 7 - 21;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void multi_queue_simple_block1(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " int multiplier = 3;"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
|
||||
NL, ""
|
||||
NL, " res[tid] = -1;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* multi_queue_simple_block2[] =
|
||||
{
|
||||
NL, "void block_fn(size_t tid, int mul, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " res[tid] = mul * 7 - 21;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void multi_queue_simple_block2(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " int multiplier = 3;"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
|
||||
NL, ""
|
||||
NL, " res[tid] = -1;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* multi_queue_simple_block3[] =
|
||||
{
|
||||
NL, "void block_fn(size_t tid, int mul, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " res[tid] = mul * 7 - 21;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void multi_queue_simple_block3(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " int multiplier = 3;"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
|
||||
NL, ""
|
||||
NL, " res[tid] = -1;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* multi_queue_simple_block4[] =
|
||||
{
|
||||
NL, "void block_fn(size_t tid, int mul, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " res[tid] = mul * 7 - 21;"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void multi_queue_simple_block4(__global int* res)"
|
||||
NL, "{"
|
||||
NL, " int multiplier = 3;"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
|
||||
NL, ""
|
||||
NL, " res[tid] = -1;"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const kernel_src sources_multi_queue_block[] =
|
||||
{
|
||||
KERNEL(multi_queue_simple_block1),
|
||||
KERNEL(multi_queue_simple_block2),
|
||||
KERNEL(multi_queue_simple_block3),
|
||||
KERNEL(multi_queue_simple_block4),
|
||||
};
|
||||
static const size_t num_kernels_multi_queue_block = arr_size(sources_multi_queue_block);
|
||||
|
||||
|
||||
int test_host_multi_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_uint i;
|
||||
cl_int err_ret, res = 0;
|
||||
clCommandQueueWrapper dev_queue;
|
||||
cl_int kernel_results[MAX_GWS] = {0};
|
||||
|
||||
size_t ret_len;
|
||||
cl_uint max_queues = 1;
|
||||
cl_uint maxQueueSize = 0;
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
|
||||
|
||||
size_t max_local_size = 1;
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
|
||||
|
||||
cl_queue_properties queue_prop_def[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
|
||||
CL_QUEUE_SIZE, maxQueueSize,
|
||||
0
|
||||
};
|
||||
|
||||
dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
|
||||
test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
|
||||
|
||||
cl_uint n = num_kernels_multi_queue_block; // Number of host queues
|
||||
std::vector<clCommandQueueWrapper> queues(n);
|
||||
std::vector<cl_command_queue> q(n);
|
||||
std::vector<clProgramWrapper> program(n);
|
||||
std::vector<clKernelWrapper> kernel(n);
|
||||
std::vector<clMemWrapper> mem(n);
|
||||
std::vector<clEventWrapper> event(n);
|
||||
|
||||
for(i = 0; i < n; ++i)
|
||||
{
|
||||
queues[i] = clCreateCommandQueueWithProperties(context, device, NULL, &err_ret);
|
||||
if(check_error(err_ret, "clCreateCommandQueueWithProperties() failed")) { res = -1; break; }
|
||||
q[i] = queues[i];
|
||||
}
|
||||
|
||||
if(err_ret == CL_SUCCESS)
|
||||
{
|
||||
for(i = 0; i < n; ++i)
|
||||
{
|
||||
size_t global = MAX_GWS;
|
||||
if(gWimpyMode)
|
||||
{
|
||||
global = 16;
|
||||
}
|
||||
|
||||
err_ret |= create_single_kernel_helper_with_build_options(context, &program[i], &kernel[i], sources_multi_queue_block[i].num_lines, sources_multi_queue_block[i].lines, sources_multi_queue_block[i].kernel_name, "-cl-std=CL2.0");
|
||||
if(check_error(err_ret, "Create single kernel failed")) { res = -1; break; }
|
||||
|
||||
mem[i] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(kernel_results), kernel_results, &err_ret);
|
||||
if(check_error(err_ret, "clCreateBuffer() failed")) { res = -1; break; }
|
||||
|
||||
err_ret |= clSetKernelArg(kernel[i], 0, sizeof(cl_mem), &mem[i]);
|
||||
if(check_error(err_ret, "clSetKernelArg(0) failed")) { res = -1; break; }
|
||||
|
||||
err_ret |= clEnqueueNDRangeKernel(q[i], kernel[i], 1, NULL, &global, 0, 0, NULL, &event[i]);
|
||||
if(check_error(err_ret, "clEnqueueNDRangeKernel() failed")) { res = -1; break; }
|
||||
}
|
||||
}
|
||||
|
||||
if(err_ret == CL_SUCCESS)
|
||||
{
|
||||
for(i = 0; i < n; ++i)
|
||||
{
|
||||
cl_int status;
|
||||
err_ret = clEnqueueReadBuffer(q[i], mem[i], CL_TRUE, 0, sizeof(kernel_results), kernel_results, 0, NULL, NULL);
|
||||
if(check_error(err_ret, "clEnqueueReadBuffer() failed")) { res = -1; break; }
|
||||
|
||||
err_ret = clGetEventInfo(event[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, &ret_len);
|
||||
if(check_error(err_ret, "clGetEventInfo() failed")) { res = -1; break; }
|
||||
|
||||
#if CL_COMPLETE != CL_SUCCESS
|
||||
#error Fix me!
|
||||
#endif
|
||||
// This hack is possible because both CL_COMPLETE and CL_SUCCESS defined as 0x00
|
||||
if(check_error(status, "Kernel execution status %d", status)) { err_ret = status; res = -1; break; }
|
||||
else if(kernel_results[0] != 0 && check_error(-1, "'%s' kernel results validation failed = %d", sources_multi_queue_block[i].kernel_name, kernel_results[0])) { res = -1; break; }
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
185
test_conformance/device_execution/host_queue_order.cpp
Normal file
185
test_conformance/device_execution/host_queue_order.cpp
Normal file
@@ -0,0 +1,185 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "procs.h"
|
||||
#include "utils.h"
|
||||
#include <time.h>
|
||||
|
||||
extern int gWimpyMode;
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
|
||||
static const char* enqueue_block_first_kernel[] =
|
||||
{
|
||||
NL, "void block_fn(uint num, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, ""
|
||||
NL, " for(int i = 1 ; i < tid ; i++)"
|
||||
NL, " {"
|
||||
NL, " for(int j = 0 ; j < num ; j++)"
|
||||
NL, " atomic_add(res+tid, (int)sqrt((float)i*i) / i);"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_block_first_kernel(uint num, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(num, res); };"
|
||||
NL, ""
|
||||
NL, " ndrange_t ndrange = ndrange_1D(num, 1);"
|
||||
NL, ""
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[0] = -1; return; }"
|
||||
NL, ""
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_block_second_kernel[] =
|
||||
{
|
||||
NL, "void block_fn(uint num, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " for(int i = 2 ; i < num ; i++)"
|
||||
NL, " {"
|
||||
NL, " res[i] = res[i]/num - (i-1);"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_block_second_kernel(uint num, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(num, res); };"
|
||||
NL, ""
|
||||
NL, " ndrange_t ndrange = ndrange_1D(1);"
|
||||
NL, ""
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[0] = -1; return; }"
|
||||
NL, ""
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static int check_kernel_results(cl_int* results, cl_int len)
|
||||
{
|
||||
for(cl_int i = 0; i < len; ++i)
|
||||
{
|
||||
if(results[i] != 0) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
Test checks kernel block execution order in case of two different kernels with enqueue block submitted to one ordered host queue.
|
||||
*/
|
||||
int test_host_queue_order(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_int k, err_ret, res = 0;
|
||||
clCommandQueueWrapper dev_queue;
|
||||
cl_int kernel_results[MAX_GWS] = {0};
|
||||
|
||||
size_t ret_len;
|
||||
cl_uint max_queues = 1;
|
||||
cl_uint maxQueueSize = 0;
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
|
||||
|
||||
size_t max_local_size = 1;
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
|
||||
|
||||
cl_queue_properties queue_prop_def[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
|
||||
CL_QUEUE_SIZE, maxQueueSize,
|
||||
0
|
||||
};
|
||||
|
||||
dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
|
||||
test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
|
||||
|
||||
cl_int status;
|
||||
size_t size = 1;
|
||||
cl_int result[MAX_GWS] = { 0 };
|
||||
cl_uint num = arr_size(result);
|
||||
if( gWimpyMode )
|
||||
{
|
||||
num = MAX(num / 16, 4);
|
||||
}
|
||||
|
||||
clMemWrapper res_mem;
|
||||
clProgramWrapper program1, program2;
|
||||
clKernelWrapper kernel1, kernel2;
|
||||
|
||||
cl_event kernel_event;
|
||||
|
||||
err_ret = create_single_kernel_helper_with_build_options(context, &program1, &kernel1, arr_size(enqueue_block_first_kernel), enqueue_block_first_kernel, "enqueue_block_first_kernel", "-cl-std=CL2.0");
|
||||
if(check_error(err_ret, "Create single kernel failed")) return -1;
|
||||
|
||||
err_ret = create_single_kernel_helper_with_build_options(context, &program2, &kernel2, arr_size(enqueue_block_second_kernel), enqueue_block_second_kernel, "enqueue_block_second_kernel", "-cl-std=CL2.0");
|
||||
if(check_error(err_ret, "Create single kernel failed")) return -1;
|
||||
|
||||
res_mem = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, sizeof(kernel_results), kernel_results, &err_ret);
|
||||
test_error(err_ret, "clCreateBuffer() failed");
|
||||
|
||||
// Enqueue first kernel
|
||||
err_ret = clSetKernelArg(kernel1, 0, sizeof(num), &num);
|
||||
test_error(err_ret, "clSetKernelArg(0) failed");
|
||||
err_ret = clSetKernelArg(kernel1, 1, sizeof(cl_mem), &res_mem);
|
||||
test_error(err_ret, "clSetKernelArg(1) failed");
|
||||
|
||||
cl_event event1 = clCreateUserEvent(context, &err_ret);
|
||||
if(check_error(err_ret, "Create user event failed")) return -1;
|
||||
|
||||
err_ret = clEnqueueNDRangeKernel(queue, kernel1, 1, NULL, &size, &size, 1, &event1, NULL);
|
||||
test_error(err_ret, "clEnqueueNDRangeKernel('enqueue_block_first_kernel') failed");
|
||||
|
||||
// Enqueue second kernel
|
||||
err_ret = clSetKernelArg(kernel2, 0, sizeof(num), &num);
|
||||
test_error(err_ret, "clSetKernelArg(0) failed");
|
||||
err_ret = clSetKernelArg(kernel2, 1, sizeof(cl_mem), &res_mem);
|
||||
test_error(err_ret, "clSetKernelArg(1) failed");
|
||||
|
||||
err_ret = clEnqueueNDRangeKernel(queue, kernel2, 1, NULL, &size, &size, 0, NULL, &kernel_event);
|
||||
test_error(err_ret, "clEnqueueNDRangeKernel('enqueue_block_second_kernel') failed");
|
||||
|
||||
//Triger execution of first kernel
|
||||
err_ret = clSetUserEventStatus(event1, CL_COMPLETE);
|
||||
test_error(err_ret, "clSetUserEventStatus() failed");
|
||||
|
||||
// Collect resulsts
|
||||
err_ret = clEnqueueReadBuffer(queue, res_mem, CL_TRUE, 0, sizeof(result), result, 0, NULL, NULL);
|
||||
test_error(err_ret, "clEnqueueReadBuffer() failed");
|
||||
|
||||
err_ret = clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, &ret_len);
|
||||
test_error(err_ret, "clGetEventInfo() failed");
|
||||
|
||||
if(check_error(status, "Kernel execution status %d", status)) return status;
|
||||
|
||||
if((k = check_kernel_results(result, num)) >= 0 && check_error(-1, "'%s' results validation failed: [%d] returned %d expected 0", "test_host_queue_order", k, result[k])) res = -1;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
101
test_conformance/device_execution/main.c
Normal file
101
test_conformance/device_execution/main.c
Normal file
@@ -0,0 +1,101 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/parseParameters.h"
|
||||
#include "utils.h"
|
||||
#include "procs.h"
|
||||
|
||||
std::string gKernelName;
|
||||
int gWimpyMode = 0;
|
||||
|
||||
basefn basefn_list[] =
|
||||
{
|
||||
#ifdef CL_VERSION_2_0
|
||||
test_device_info,
|
||||
test_device_queue,
|
||||
test_execute_block,
|
||||
test_enqueue_block,
|
||||
test_enqueue_nested_blocks,
|
||||
test_enqueue_wg_size,
|
||||
test_enqueue_flags,
|
||||
test_enqueue_multi_queue,
|
||||
test_host_multi_queue,
|
||||
test_enqueue_ndrange,
|
||||
test_host_queue_order,
|
||||
#endif
|
||||
};
|
||||
|
||||
const char *commonfn_names[] =
|
||||
{
|
||||
#ifdef CL_VERSION_2_0
|
||||
"test_device_info",
|
||||
"test_device_queue",
|
||||
"test_execute_block",
|
||||
"test_enqueue_block",
|
||||
"test_enqueue_nested_blocks",
|
||||
"test_enqueue_wg_size",
|
||||
"test_enqueue_flags",
|
||||
"test_enqueue_multi_queue",
|
||||
"test_host_multi_queue",
|
||||
"test_enqueue_ndrange",
|
||||
"test_host_queue_order",
|
||||
#endif
|
||||
};
|
||||
|
||||
ct_assert(arr_size(commonfn_names) == arr_size(basefn_list))
|
||||
|
||||
static const int num_commonfns = arr_size(commonfn_names);
|
||||
|
||||
int
|
||||
main(int argc, const char *argv[])
|
||||
{
|
||||
argc = parseCustomParam(argc, argv);
|
||||
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
int argsRemoveNum = 0;
|
||||
if ( strcmp(argv[i], "-kernelName") == 0 ) {
|
||||
if((i + 1) > argc && argv[i + 1] == NULL) {
|
||||
vlog( "Missing value for -kernelName argument\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
gKernelName = std::string(argv[i + 1]);
|
||||
argsRemoveNum += 2;
|
||||
}
|
||||
if (strcmp(argv[i], "-w") == 0 ){
|
||||
gWimpyMode = 1;
|
||||
argsRemoveNum += 1;
|
||||
}
|
||||
|
||||
|
||||
if (argsRemoveNum > 0) {
|
||||
for (int j = i; j < (argc - argsRemoveNum); ++j)
|
||||
argv[j] = argv[j + argsRemoveNum];
|
||||
|
||||
argc -= argsRemoveNum;
|
||||
--i;
|
||||
}
|
||||
}
|
||||
|
||||
return runTestHarness(argc, argv, num_commonfns, basefn_list, commonfn_names, false, false, 0);
|
||||
}
|
||||
374
test_conformance/device_execution/nested_blocks.cpp
Normal file
374
test_conformance/device_execution/nested_blocks.cpp
Normal file
@@ -0,0 +1,374 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "procs.h"
|
||||
#include "utils.h"
|
||||
#include <time.h>
|
||||
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
|
||||
static int gNestingLevel = 4;
|
||||
extern int gWimpyMode;
|
||||
|
||||
static const char* enqueue_nested_blocks_single[] =
|
||||
{
|
||||
NL, "void block_fn(__global int* res, int level)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(3);"
|
||||
NL, " if(--level < 0) return;"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(res, level); };"
|
||||
NL, ""
|
||||
NL, " // Only 1 work-item enqueues block"
|
||||
NL, " if(tid == 1)"
|
||||
NL, " {"
|
||||
NL, " res[tid]++;"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_nested_blocks_single(__global int* res, int level)"
|
||||
NL, "{"
|
||||
NL, " block_fn(res, level);"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_nested_blocks_some_eq[] =
|
||||
{
|
||||
NL, "void block_fn(int level, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(10);"
|
||||
NL, " if(--level < 0) return;"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
|
||||
NL, ""
|
||||
NL, " // Some work-items enqueues nested blocks with the same level"
|
||||
NL, " if(tid < (get_global_size(0) >> 1))"
|
||||
NL, " {"
|
||||
NL, " atomic_inc(&res[tid]);"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_nested_blocks_some_eq(__global int* res, int level)"
|
||||
NL, "{"
|
||||
NL, " block_fn(level, res);"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_nested_blocks_some_diff[] =
|
||||
{
|
||||
NL, "void block_fn(int level, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(10);"
|
||||
NL, " if(--level < 0) return;"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
|
||||
NL, ""
|
||||
NL, " // Some work-items enqueues nested blocks with different levels"
|
||||
NL, " if(tid % 2)"
|
||||
NL, " {"
|
||||
NL, " atomic_inc(&res[tid]);"
|
||||
NL, " if(level >= tid)"
|
||||
NL, " {"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_nested_blocks_some_diff(__global int* res, int level)"
|
||||
NL, "{"
|
||||
NL, " block_fn(level, res);"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_nested_blocks_all_eq[] =
|
||||
{
|
||||
NL, "void block_fn(int level, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(4);"
|
||||
NL, " if(--level < 0) return;"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
|
||||
NL, ""
|
||||
NL, " // All work-items enqueues nested blocks with the same level"
|
||||
NL, " atomic_inc(&res[tid]);"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_nested_blocks_all_eq(__global int* res, int level)"
|
||||
NL, "{"
|
||||
NL, " block_fn(level, res);"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static const char* enqueue_nested_blocks_all_diff[] =
|
||||
{
|
||||
NL, "void block_fn(int level, __global int* res)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " queue_t def_q = get_default_queue();"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(10);"
|
||||
NL, " if(--level < 0) return;"
|
||||
NL, ""
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
|
||||
NL, ""
|
||||
NL, " // All work-items enqueues nested blocks with different levels"
|
||||
NL, " atomic_inc(&res[tid]);"
|
||||
NL, " if(level >= tid)"
|
||||
NL, " {"
|
||||
NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void enqueue_nested_blocks_all_diff(__global int* res, int level)"
|
||||
NL, "{"
|
||||
NL, " block_fn(level, res);"
|
||||
NL, "}"
|
||||
NL
|
||||
};
|
||||
|
||||
static int check_single(cl_int* results, cl_int len, cl_int nesting_level)
|
||||
{
|
||||
int i, fail = -1;
|
||||
const cl_uint tid = 1;
|
||||
|
||||
for(i = 0; i < len; ++i)
|
||||
{
|
||||
if(i != tid && results[i] != 0) { fail = i; break; }
|
||||
if(i == tid && results[i] != nesting_level) { fail = i; break; }
|
||||
}
|
||||
return fail;
|
||||
}
|
||||
|
||||
void generate_reference_some_eq(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
|
||||
{
|
||||
size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 10;
|
||||
if(--nesting_level < 0) return;
|
||||
|
||||
for (size_t tid = 0; tid < globalWorkSize; ++tid)
|
||||
{
|
||||
if (tid < (globalWorkSize >> 1))
|
||||
{
|
||||
++referenceResults[tid];
|
||||
generate_reference_some_eq(referenceResults, len, nesting_level);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int check_some_eq(cl_int* results, cl_int len, cl_int nesting_level)
|
||||
{
|
||||
int i, fail = -1;
|
||||
std::vector<cl_int> referenceResults(len, 0);
|
||||
generate_reference_some_eq(referenceResults, len, nesting_level);
|
||||
|
||||
for(i = 0; i < len; ++i)
|
||||
{
|
||||
if (results[i] != referenceResults[i]) { fail = i; break; }
|
||||
}
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
void generate_reference_some_diff(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
|
||||
{
|
||||
size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 10;
|
||||
if(--nesting_level < 0) return;
|
||||
|
||||
for (size_t tid = 0; tid < globalWorkSize; ++tid)
|
||||
{
|
||||
if (tid % 2)
|
||||
{
|
||||
++referenceResults[tid];
|
||||
if (nesting_level >= tid)
|
||||
{
|
||||
generate_reference_some_diff(referenceResults, len, nesting_level);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int check_some_diff(cl_int* results, cl_int len, cl_int nesting_level)
|
||||
{
|
||||
int i, fail = -1;
|
||||
std::vector<cl_int> referenceResults(len, 0);
|
||||
generate_reference_some_diff(referenceResults, len, nesting_level);
|
||||
|
||||
for(i = 0; i < len; ++i)
|
||||
{
|
||||
if (results[i] != referenceResults[i]) { fail = i; break; }
|
||||
}
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
void generate_reference_all_eq(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
|
||||
{
|
||||
size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 4;
|
||||
if(--nesting_level < 0) return;
|
||||
|
||||
for (size_t tid = 0; tid < globalWorkSize; ++tid)
|
||||
{
|
||||
++referenceResults[tid];
|
||||
generate_reference_all_eq(referenceResults, len, nesting_level);
|
||||
}
|
||||
}
|
||||
|
||||
static int check_all_eq(cl_int* results, cl_int len, cl_int nesting_level)
|
||||
{
|
||||
int i, fail = -1;
|
||||
std::vector<cl_int> referenceResults(len, 0);
|
||||
generate_reference_all_eq(referenceResults, len, nesting_level);
|
||||
|
||||
for(i = 0; i < len; ++i)
|
||||
{
|
||||
if (results[i] != referenceResults[i]) { fail = i; break; }
|
||||
}
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
void generate_reference_all_diff(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
|
||||
{
|
||||
size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 10;
|
||||
if(--nesting_level < 0) return;
|
||||
|
||||
for (size_t tid = 0; tid < globalWorkSize; ++tid)
|
||||
{
|
||||
++referenceResults[tid];
|
||||
if (nesting_level >= tid)
|
||||
{
|
||||
generate_reference_all_diff(referenceResults, len, nesting_level);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int check_all_diff(cl_int* results, cl_int len, cl_int nesting_level)
|
||||
{
|
||||
int i, fail = -1;
|
||||
std::vector<cl_int> referenceResults(len, 0);
|
||||
generate_reference_all_diff(referenceResults, len, nesting_level);
|
||||
|
||||
for(i = 0; i < len; ++i)
|
||||
{
|
||||
if (results[i] != referenceResults[i]) { fail = i; break; }
|
||||
}
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
static const kernel_src_check sources_nested_blocks[] =
|
||||
{
|
||||
{ KERNEL(enqueue_nested_blocks_single), check_single },
|
||||
{ KERNEL(enqueue_nested_blocks_some_eq), check_some_eq },
|
||||
{ KERNEL(enqueue_nested_blocks_some_diff), check_some_diff },
|
||||
{ KERNEL(enqueue_nested_blocks_all_eq), check_all_eq },
|
||||
{ KERNEL(enqueue_nested_blocks_all_diff), check_all_diff }
|
||||
};
|
||||
|
||||
int test_enqueue_nested_blocks(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_uint i, k;
|
||||
cl_int err_ret, res = 0;
|
||||
clCommandQueueWrapper dev_queue;
|
||||
const size_t MAX_GLOBAL_WORK_SIZE = MAX_GWS / 4;
|
||||
cl_int kernel_results[MAX_GLOBAL_WORK_SIZE] = {0};
|
||||
|
||||
if(gWimpyMode)
|
||||
{
|
||||
gNestingLevel = 2;
|
||||
vlog( "*** WARNING: Testing in Wimpy mode! ***\n" );
|
||||
vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
|
||||
}
|
||||
|
||||
size_t ret_len;
|
||||
cl_uint max_queues = 1;
|
||||
cl_uint maxQueueSize = 0;
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
|
||||
|
||||
err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
|
||||
test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
|
||||
|
||||
cl_queue_properties queue_prop_def[] =
|
||||
{
|
||||
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
|
||||
CL_QUEUE_SIZE, maxQueueSize,
|
||||
0
|
||||
};
|
||||
|
||||
dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
|
||||
test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
|
||||
|
||||
kernel_arg args[] =
|
||||
{
|
||||
{ sizeof(cl_int), &gNestingLevel }
|
||||
};
|
||||
|
||||
size_t failCnt = 0;
|
||||
for(k = 0; k < arr_size(sources_nested_blocks); ++k)
|
||||
{
|
||||
if (!gKernelName.empty() && gKernelName != sources_nested_blocks[k].src.kernel_name)
|
||||
continue;
|
||||
|
||||
log_info("Running '%s' kernel (%d of %d) ...\n", sources_nested_blocks[k].src.kernel_name, k + 1, arr_size(sources_nested_blocks));
|
||||
for(i = 0; i < MAX_GLOBAL_WORK_SIZE; ++i) kernel_results[i] = 0;
|
||||
|
||||
err_ret = run_n_kernel_args(context, queue, sources_nested_blocks[k].src.lines, sources_nested_blocks[k].src.num_lines, sources_nested_blocks[k].src.kernel_name, 0, MAX_GLOBAL_WORK_SIZE, kernel_results, sizeof(kernel_results), arr_size(args), args);
|
||||
if(check_error(err_ret, "'%s' kernel execution failed", sources_nested_blocks[k].src.kernel_name)) { res = -1; continue ; }
|
||||
|
||||
//check results
|
||||
int fail = sources_nested_blocks[k].check(kernel_results, MAX_GLOBAL_WORK_SIZE, gNestingLevel);
|
||||
|
||||
if(check_error(err_ret, "'%s' kernel execution failed", sources_nested_blocks[k].src.kernel_name)) { ++failCnt; res = -1; continue; }
|
||||
else if(fail >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", sources_nested_blocks[k].src.kernel_name, fail, kernel_results[fail])) { ++failCnt; res = -1; continue; }
|
||||
else log_info("'%s' kernel is OK.\n", sources_nested_blocks[k].src.kernel_name);
|
||||
}
|
||||
|
||||
if (failCnt > 0)
|
||||
{
|
||||
log_error("ERROR: %d of %d kernels failed.\n", failCnt, arr_size(sources_nested_blocks));
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
40
test_conformance/device_execution/procs.h
Normal file
40
test_conformance/device_execution/procs.h
Normal file
@@ -0,0 +1,40 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern int test_device_info(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_device_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_execute_block(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_block(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_nested_blocks(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_wg_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_flags(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_multi_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_host_multi_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_ndrange(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_host_queue_order(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_execution_stress(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
76
test_conformance/device_execution/utils.cpp
Normal file
76
test_conformance/device_execution/utils.cpp
Normal file
@@ -0,0 +1,76 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
int run_single_kernel(cl_context context, cl_command_queue queue, const char** source, unsigned int num_lines, const char* kernel_name, void* results, size_t res_size)
|
||||
{
|
||||
return run_single_kernel_args(context, queue, source, num_lines, kernel_name, results, res_size, 0, NULL);
|
||||
}
|
||||
|
||||
int run_single_kernel_args(cl_context context, cl_command_queue queue, const char** source, unsigned int num_lines, const char* kernel_name, void* results, size_t res_size, cl_uint num_args, kernel_arg* args)
|
||||
{
|
||||
return run_n_kernel_args(context, queue, source, num_lines, kernel_name, 1, 1, results, res_size, num_args, args);
|
||||
}
|
||||
|
||||
int run_n_kernel_args(cl_context context, cl_command_queue queue, const char** source, unsigned int num_lines, const char* kernel_name, size_t local, size_t global, void* results, size_t res_size, cl_uint num_args, kernel_arg* args)
|
||||
{
|
||||
cl_int err_ret, status;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper mem;
|
||||
clEventWrapper event;
|
||||
cl_uint i;
|
||||
size_t ret_len;
|
||||
|
||||
err_ret = create_single_kernel_helper_with_build_options(context, &program, &kernel, num_lines, source, kernel_name, "-cl-std=CL2.0");
|
||||
if(check_error(err_ret, "Create single kernel failed")) return -1;
|
||||
|
||||
mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, res_size, results, &err_ret);
|
||||
test_error(err_ret, "clCreateBuffer() failed");
|
||||
|
||||
err_ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem);
|
||||
if(check_error(err_ret, "clSetKernelArg(%d, %d, %p) for kernel: '%s' failed: %d", 0, (int)sizeof(cl_mem), &mem, kernel_name, err_ret)) return err_ret;
|
||||
|
||||
for(i = 0; i < num_args; ++i)
|
||||
{
|
||||
err_ret = clSetKernelArg(kernel, i+1, args[i].size, args[i].ptr);
|
||||
if(check_error(err_ret, "clSetKernelArg(%d, %d, %p) for kernel: '%s' failed: %d", (int)(i+1), (int)args[i].size, args[i].ptr, kernel_name, err_ret)) return err_ret;
|
||||
}
|
||||
|
||||
err_ret = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, (local ? &local : NULL), 0, NULL, &event);
|
||||
if(check_error(err_ret, "clEnqueueNDRangeKernel('%s', gws=%d, lws=%d) failed", kernel_name, (int)global, (int)local)) return err_ret;
|
||||
|
||||
err_ret = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, res_size, results, 0, NULL, NULL);
|
||||
test_error(err_ret, "clEnqueueReadBuffer() failed");
|
||||
|
||||
err_ret = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, &ret_len);
|
||||
test_error(err_ret, "clGetEventInfo() failed");
|
||||
|
||||
#if CL_COMPLETE != CL_SUCCESS
|
||||
#error Fix me!
|
||||
#endif
|
||||
|
||||
// This hack is possible because CL_COMPLETE and CL_SUCCESS defined as 0x0
|
||||
if(check_error(status, "Kernel execution status %d", status)) return status;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
73
test_conformance/device_execution/utils.h
Normal file
73
test_conformance/device_execution/utils.h
Normal file
@@ -0,0 +1,73 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _utils_h_
|
||||
#define _utils_h_
|
||||
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#ifndef CL_VERSION_2_0
|
||||
#define CL_VERSION_2_0
|
||||
#endif
|
||||
|
||||
#define MAX_QUEUES 1000 // Max number of queues to test
|
||||
#define MAX_GWS 256 // Global Work Size (must be multiple of 16)
|
||||
|
||||
|
||||
#define NL "\n"
|
||||
#define arr_size(a) (sizeof(a)/sizeof(a[0]))
|
||||
#define check_error(errCode,msg,...) ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", ## __VA_ARGS__, __FILE__, __LINE__), 1) : 0)
|
||||
|
||||
#define KERNEL(name) { arr_size(name), name, #name }
|
||||
|
||||
extern std::string gKernelName;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned int num_lines;
|
||||
const char** lines;
|
||||
const char* kernel_name;
|
||||
} kernel_src;
|
||||
|
||||
typedef int (*fn_check)(cl_int*, cl_int, cl_int);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
kernel_src src;
|
||||
fn_check check;
|
||||
} kernel_src_check;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
size_t size;
|
||||
const void* ptr;
|
||||
} kernel_arg;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
kernel_src src;
|
||||
cl_int dim;
|
||||
cl_bool localSize;
|
||||
cl_bool offset;
|
||||
} kernel_src_dim_check;
|
||||
|
||||
int run_single_kernel(cl_context context, cl_command_queue queue, const char** source, unsigned int num_lines, const char* kernel_name, void* results, size_t res_size);
|
||||
int run_single_kernel_args(cl_context context, cl_command_queue queue, const char** source, unsigned int num_lines, const char* kernel_name, void* results, size_t res_size, cl_uint num_args, kernel_arg* args);
|
||||
int run_n_kernel_args(cl_context context, cl_command_queue queue, const char** source, unsigned int num_lines, const char* kernel_name, size_t local, size_t global, void* results, size_t res_size, cl_uint num_args, kernel_arg* args);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user