mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
test allocations: restore small number of work items in case of reduction (#1932)
This commit is contained in:
committed by
GitHub
parent
4fceb78b93
commit
b377b8537b
@@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -20,7 +20,8 @@
|
||||
|
||||
|
||||
const char *buffer_kernel_pattern = {
|
||||
"__kernel void sample_test(%s __global uint *result, __global %s *array_sizes, uint per_item)\n"
|
||||
"__kernel void sample_test(%s __global uint *result, __global %s "
|
||||
"*array_sizes, uint per_item)\n"
|
||||
"{\n"
|
||||
"\tint tid = get_global_id(0);\n"
|
||||
"\tuint r = 0;\n"
|
||||
@@ -29,7 +30,8 @@ const char *buffer_kernel_pattern = {
|
||||
"%s"
|
||||
"\t}\n"
|
||||
"\tresult[tid] = r;\n"
|
||||
"}\n" };
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *image_kernel_pattern = {
|
||||
"__kernel void sample_test(%s __global uint *result)\n"
|
||||
@@ -40,7 +42,8 @@ const char *image_kernel_pattern = {
|
||||
"\tint x, y;\n"
|
||||
"%s"
|
||||
"\tresult[get_global_id(0)] += color.x + color.y + color.z + color.w;\n"
|
||||
"}\n" };
|
||||
"}\n"
|
||||
};
|
||||
|
||||
const char *read_pattern = {
|
||||
"\tfor(y=0; y<get_image_height(image%d); y++)\n"
|
||||
@@ -50,11 +53,11 @@ const char *read_pattern = {
|
||||
"\t\t\t}\n"
|
||||
};
|
||||
|
||||
const char *offset_pattern =
|
||||
"\tconst uint4 offset = (uint4)(0,1,2,3);\n";
|
||||
const char *offset_pattern = "\tconst uint4 offset = (uint4)(0,1,2,3);\n";
|
||||
|
||||
const char *sampler_pattern =
|
||||
"\tconst sampler_t sampler = CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n";
|
||||
"\tconst sampler_t sampler = CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST | "
|
||||
"CLK_NORMALIZED_COORDS_FALSE;\n";
|
||||
|
||||
|
||||
const char *write_pattern = {
|
||||
@@ -68,7 +71,8 @@ const char *write_pattern = {
|
||||
};
|
||||
|
||||
|
||||
int check_image(cl_command_queue queue, cl_mem mem) {
|
||||
int check_image(cl_command_queue queue, cl_mem mem)
|
||||
{
|
||||
int error;
|
||||
cl_mem_object_type type;
|
||||
size_t width, height;
|
||||
@@ -76,7 +80,8 @@ int check_image(cl_command_queue queue, cl_mem mem) {
|
||||
cl_uint *data;
|
||||
|
||||
error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
|
||||
if (error) {
|
||||
if (error)
|
||||
{
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
|
||||
return -1;
|
||||
}
|
||||
@@ -108,8 +113,9 @@ int check_image(cl_command_queue queue, cl_mem mem) {
|
||||
}
|
||||
|
||||
|
||||
data = (cl_uint*)malloc(width*4*sizeof(cl_uint));
|
||||
if (data == NULL) {
|
||||
data = (cl_uint *)malloc(width * 4 * sizeof(cl_uint));
|
||||
if (data == NULL)
|
||||
{
|
||||
log_error("Failed to malloc host buffer for writing into image.\n");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
@@ -119,19 +125,27 @@ int check_image(cl_command_queue queue, cl_mem mem) {
|
||||
region[0] = width;
|
||||
region[1] = 1;
|
||||
region[2] = 1;
|
||||
for (origin[1] = 0; origin[1] < height; origin[1]++) {
|
||||
error = clEnqueueReadImage(queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
|
||||
if (error) {
|
||||
for (origin[1] = 0; origin[1] < height; origin[1]++)
|
||||
{
|
||||
error = clEnqueueReadImage(queue, mem, CL_TRUE, origin, region, 0, 0,
|
||||
data, 0, NULL, NULL);
|
||||
if (error)
|
||||
{
|
||||
print_error(error, "clEnqueueReadImage failed");
|
||||
free(data);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (x=0; x<width; x++) {
|
||||
for (j=0; j<4; j++) {
|
||||
if (data[x*4+j] != (cl_uint)(x*origin[1]+j)) {
|
||||
log_error("Pixel %d, %d, component %d, expected %u, got %u.\n",
|
||||
(int)x, (int)origin[1], (int)j, (cl_uint)(x*origin[1]+j), data[x*4+j]);
|
||||
for (x = 0; x < width; x++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
if (data[x * 4 + j] != (cl_uint)(x * origin[1] + j))
|
||||
{
|
||||
log_error(
|
||||
"Pixel %d, %d, component %d, expected %u, got %u.\n",
|
||||
(int)x, (int)origin[1], (int)j,
|
||||
(cl_uint)(x * origin[1] + j), data[x * 4 + j]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -142,9 +156,11 @@ int check_image(cl_command_queue queue, cl_mem mem) {
|
||||
}
|
||||
|
||||
|
||||
#define NUM_OF_WORK_ITEMS (8192 * 32)
|
||||
|
||||
int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum) {
|
||||
int execute_kernel(cl_context context, cl_command_queue *queue,
|
||||
cl_device_id device_id, int test, cl_mem mems[],
|
||||
int number_of_mems_used, int verify_checksum,
|
||||
unsigned int number_of_work_itmes)
|
||||
{
|
||||
|
||||
char *argument_string;
|
||||
char *access_string;
|
||||
@@ -158,73 +174,97 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
|
||||
cl_uint per_item;
|
||||
cl_uint per_item_uint;
|
||||
cl_uint final_result;
|
||||
std::vector<cl_uint> returned_results(NUM_OF_WORK_ITEMS);
|
||||
std::vector<cl_uint> returned_results(number_of_work_itmes);
|
||||
clEventWrapper event;
|
||||
cl_int event_status;
|
||||
|
||||
// Allocate memory for the kernel source
|
||||
argument_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*64);
|
||||
access_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10));
|
||||
kernel_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10+64)+1024);
|
||||
argument_string =
|
||||
(char *)malloc(sizeof(char) * MAX_NUMBER_TO_ALLOCATE * 64);
|
||||
access_string = (char *)malloc(sizeof(char) * MAX_NUMBER_TO_ALLOCATE
|
||||
* (strlen(read_pattern) + 10));
|
||||
kernel_string = (char *)malloc(sizeof(char) * MAX_NUMBER_TO_ALLOCATE
|
||||
* (strlen(read_pattern) + 10 + 64)
|
||||
+ 1024);
|
||||
argument_string[0] = '\0';
|
||||
access_string[0] = '\0';
|
||||
kernel_string[0] = '\0';
|
||||
|
||||
// Zero the results.
|
||||
for (i=0; i<NUM_OF_WORK_ITEMS; i++)
|
||||
returned_results[i] = 0;
|
||||
for (i = 0; i < number_of_work_itmes; i++) returned_results[i] = 0;
|
||||
|
||||
// detect if device supports ulong/int64
|
||||
//detect whether profile of the device is embedded
|
||||
// detect whether profile of the device is embedded
|
||||
bool support64 = true;
|
||||
char profile[1024] = "";
|
||||
error = clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
|
||||
if ((NULL != strstr(profile, "EMBEDDED_PROFILE")) &&
|
||||
(!is_extension_available(device_id, "cles_khr_int64"))) {
|
||||
support64 = false;
|
||||
error = clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(profile),
|
||||
profile, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n");
|
||||
if ((NULL != strstr(profile, "EMBEDDED_PROFILE"))
|
||||
&& (!is_extension_available(device_id, "cles_khr_int64")))
|
||||
{
|
||||
support64 = false;
|
||||
}
|
||||
|
||||
// Build the kernel source
|
||||
if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
|
||||
for(i=0; i<number_of_mems_used; i++) {
|
||||
sprintf(argument_string + strlen(argument_string), " __global uint *buffer%d, ", i);
|
||||
sprintf(access_string + strlen( access_string), "\t\tif (i<array_sizes[%d]) r += buffer%d[i];\n", i, i);
|
||||
if (test == BUFFER || test == BUFFER_NON_BLOCKING)
|
||||
{
|
||||
for (i = 0; i < number_of_mems_used; i++)
|
||||
{
|
||||
sprintf(argument_string + strlen(argument_string),
|
||||
" __global uint *buffer%d, ", i);
|
||||
sprintf(access_string + strlen(access_string),
|
||||
"\t\tif (i<array_sizes[%d]) r += buffer%d[i];\n", i, i);
|
||||
}
|
||||
char type[10];
|
||||
if (support64) {
|
||||
if (support64)
|
||||
{
|
||||
sprintf(type, "ulong");
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
sprintf(type, "uint");
|
||||
}
|
||||
sprintf(kernel_string, buffer_kernel_pattern, argument_string, type, type, type, type, type, type, access_string);
|
||||
sprintf(kernel_string, buffer_kernel_pattern, argument_string, type,
|
||||
type, type, type, type, type, access_string);
|
||||
}
|
||||
else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING) {
|
||||
for(i=0; i<number_of_mems_used; i++) {
|
||||
sprintf(argument_string + strlen(argument_string), " read_only image2d_t image%d, ", i);
|
||||
sprintf(access_string + strlen(access_string), read_pattern, i, "%", i, i);
|
||||
else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING)
|
||||
{
|
||||
for (i = 0; i < number_of_mems_used; i++)
|
||||
{
|
||||
sprintf(argument_string + strlen(argument_string),
|
||||
" read_only image2d_t image%d, ", i);
|
||||
sprintf(access_string + strlen(access_string), read_pattern, i, "%",
|
||||
i, i);
|
||||
}
|
||||
sprintf(kernel_string, image_kernel_pattern, argument_string, sampler_pattern, access_string);
|
||||
sprintf(kernel_string, image_kernel_pattern, argument_string,
|
||||
sampler_pattern, access_string);
|
||||
}
|
||||
else if (test == IMAGE_WRITE || test == IMAGE_WRITE_NON_BLOCKING) {
|
||||
for(i=0; i<number_of_mems_used; i++) {
|
||||
sprintf(argument_string + strlen(argument_string), " write_only image2d_t image%d, ", i);
|
||||
sprintf(access_string + strlen( access_string), write_pattern, i, "%", i, i);
|
||||
else if (test == IMAGE_WRITE || test == IMAGE_WRITE_NON_BLOCKING)
|
||||
{
|
||||
for (i = 0; i < number_of_mems_used; i++)
|
||||
{
|
||||
sprintf(argument_string + strlen(argument_string),
|
||||
" write_only image2d_t image%d, ", i);
|
||||
sprintf(access_string + strlen(access_string), write_pattern, i,
|
||||
"%", i, i);
|
||||
}
|
||||
sprintf(kernel_string, image_kernel_pattern, argument_string, offset_pattern, access_string);
|
||||
sprintf(kernel_string, image_kernel_pattern, argument_string,
|
||||
offset_pattern, access_string);
|
||||
}
|
||||
ptr = kernel_string;
|
||||
|
||||
// Create the kernel
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" );
|
||||
error = create_single_kernel_helper(context, &program, &kernel, 1,
|
||||
(const char **)&ptr, "sample_test");
|
||||
|
||||
free(argument_string);
|
||||
free(access_string);
|
||||
free(kernel_string);
|
||||
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
if (result != SUCCEEDED) {
|
||||
if (result != SUCCEEDED)
|
||||
{
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tCreate kernel failed: %s.\n", IGetErrorString(error));
|
||||
else
|
||||
@@ -233,80 +273,109 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
|
||||
}
|
||||
|
||||
// Set the arguments
|
||||
for (i=0; i<number_of_mems_used; i++) {
|
||||
for (i = 0; i < number_of_mems_used; i++)
|
||||
{
|
||||
error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mems[i]);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
}
|
||||
|
||||
// Set the result
|
||||
result_mem = clCreateBuffer(
|
||||
context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(cl_uint) * NUM_OF_WORK_ITEMS, returned_results.data(), &error);
|
||||
result_mem =
|
||||
clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(cl_uint) * number_of_work_itmes,
|
||||
returned_results.data(), &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
error = clSetKernelArg(kernel, i, sizeof(result_mem), &result_mem);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Thread dimensions for execution
|
||||
global_dims[0] = NUM_OF_WORK_ITEMS; global_dims[1] = 1; global_dims[2] = 1;
|
||||
global_dims[0] = number_of_work_itmes;
|
||||
global_dims[1] = 1;
|
||||
global_dims[2] = 1;
|
||||
|
||||
// We have extra arguments for the buffer kernel because we need to pass in the buffer sizes
|
||||
// We have extra arguments for the buffer kernel because we need to pass in
|
||||
// the buffer sizes
|
||||
cl_ulong *ulSizes = NULL;
|
||||
cl_uint *uiSizes = NULL;
|
||||
if (support64) {
|
||||
ulSizes = (cl_ulong*)malloc(sizeof(cl_ulong)*number_of_mems_used);
|
||||
cl_uint *uiSizes = NULL;
|
||||
if (support64)
|
||||
{
|
||||
ulSizes = (cl_ulong *)malloc(sizeof(cl_ulong) * number_of_mems_used);
|
||||
}
|
||||
else {
|
||||
uiSizes = (cl_uint*)malloc(sizeof(cl_uint)*number_of_mems_used);
|
||||
else
|
||||
{
|
||||
uiSizes = (cl_uint *)malloc(sizeof(cl_uint) * number_of_mems_used);
|
||||
}
|
||||
cl_ulong max_size = 0;
|
||||
clMemWrapper buffer_sizes;
|
||||
if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
|
||||
for (i=0; i<number_of_mems_used; i++) {
|
||||
if (test == BUFFER || test == BUFFER_NON_BLOCKING)
|
||||
{
|
||||
for (i = 0; i < number_of_mems_used; i++)
|
||||
{
|
||||
size_t size;
|
||||
error = clGetMemObjectInfo(mems[i], CL_MEM_SIZE, sizeof(size), &size, NULL);
|
||||
test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
|
||||
if (support64) {
|
||||
ulSizes[i] = size/sizeof(cl_uint);
|
||||
error = clGetMemObjectInfo(mems[i], CL_MEM_SIZE, sizeof(size),
|
||||
&size, NULL);
|
||||
test_error_abort(error,
|
||||
"clGetMemObjectInfo failed for CL_MEM_SIZE.");
|
||||
if (support64)
|
||||
{
|
||||
ulSizes[i] = size / sizeof(cl_uint);
|
||||
}
|
||||
else {
|
||||
uiSizes[i] = (cl_uint)size/sizeof(cl_uint);
|
||||
else
|
||||
{
|
||||
uiSizes[i] = (cl_uint)size / sizeof(cl_uint);
|
||||
}
|
||||
if (size/sizeof(cl_uint) > max_size)
|
||||
max_size = size/sizeof(cl_uint);
|
||||
if (size / sizeof(cl_uint) > max_size)
|
||||
max_size = size / sizeof(cl_uint);
|
||||
}
|
||||
if (support64) {
|
||||
buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_ulong)*number_of_mems_used, ulSizes, &error);
|
||||
if (support64)
|
||||
{
|
||||
buffer_sizes = clCreateBuffer(
|
||||
context, CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(cl_ulong) * number_of_mems_used, ulSizes, &error);
|
||||
}
|
||||
else {
|
||||
buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*number_of_mems_used, uiSizes, &error);
|
||||
else
|
||||
{
|
||||
buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(cl_uint) * number_of_mems_used,
|
||||
uiSizes, &error);
|
||||
}
|
||||
test_error_abort(error, "clCreateBuffer failed");
|
||||
error = clSetKernelArg(kernel, number_of_mems_used+1, sizeof(cl_mem), &buffer_sizes);
|
||||
error = clSetKernelArg(kernel, number_of_mems_used + 1, sizeof(cl_mem),
|
||||
&buffer_sizes);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
per_item = (cl_uint)ceil((double)max_size/global_dims[0]);
|
||||
per_item = (cl_uint)ceil((double)max_size / global_dims[0]);
|
||||
if (per_item > CL_UINT_MAX)
|
||||
log_error("Size is too large for a uint parameter to the kernel. Expect invalid results.\n");
|
||||
log_error("Size is too large for a uint parameter to the kernel. "
|
||||
"Expect invalid results.\n");
|
||||
per_item_uint = (cl_uint)per_item;
|
||||
error = clSetKernelArg(kernel, number_of_mems_used+2, sizeof(per_item_uint), &per_item_uint);
|
||||
error = clSetKernelArg(kernel, number_of_mems_used + 2,
|
||||
sizeof(per_item_uint), &per_item_uint);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
}
|
||||
if (ulSizes) {
|
||||
if (ulSizes)
|
||||
{
|
||||
free(ulSizes);
|
||||
}
|
||||
if (uiSizes) {
|
||||
if (uiSizes)
|
||||
{
|
||||
free(uiSizes);
|
||||
}
|
||||
|
||||
size_t local_dims[3] = {1,1,1};
|
||||
error = get_max_common_work_group_size(context, kernel, global_dims[0], &local_dims[0]);
|
||||
size_t local_dims[3] = { 1, 1, 1 };
|
||||
error = get_max_common_work_group_size(context, kernel, global_dims[0],
|
||||
&local_dims[0]);
|
||||
test_error(error, "get_max_common_work_group_size failed");
|
||||
|
||||
// Execute the kernel
|
||||
error = clEnqueueNDRangeKernel(*queue, kernel, 1, NULL, global_dims, local_dims, 0, NULL, &event);
|
||||
error = clEnqueueNDRangeKernel(*queue, kernel, 1, NULL, global_dims,
|
||||
local_dims, 0, NULL, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
if (result != SUCCEEDED) {
|
||||
if (result != SUCCEEDED)
|
||||
{
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tExecute kernel failed: %s (global dim: %ld, local dim: %ld)\n", IGetErrorString(error), global_dims[0], local_dims[0]);
|
||||
log_info("\t\tExecute kernel failed: %s (global dim: %ld, local "
|
||||
"dim: %ld)\n",
|
||||
IGetErrorString(error), global_dims[0], local_dims[0]);
|
||||
else
|
||||
print_error(error, "clEnqueueNDRangeKernel failed");
|
||||
return result;
|
||||
@@ -317,7 +386,8 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
|
||||
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
if (result != SUCCEEDED)
|
||||
{
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
|
||||
else
|
||||
@@ -326,13 +396,20 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
|
||||
}
|
||||
|
||||
// Verify that the event from the execution did not have an error
|
||||
error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
|
||||
test_error_abort(error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
|
||||
if (event_status < 0) {
|
||||
result = check_allocation_error(context, device_id, event_status, queue);
|
||||
if (result != SUCCEEDED) {
|
||||
error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
||||
sizeof(event_status), &event_status, NULL);
|
||||
test_error_abort(
|
||||
error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
|
||||
if (event_status < 0)
|
||||
{
|
||||
result =
|
||||
check_allocation_error(context, device_id, event_status, queue);
|
||||
if (result != SUCCEEDED)
|
||||
{
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tEvent returned from kernel execution indicates failure: %s.\n", IGetErrorString(event_status));
|
||||
log_info("\t\tEvent returned from kernel execution indicates "
|
||||
"failure: %s.\n",
|
||||
IGetErrorString(event_status));
|
||||
else
|
||||
print_error(event_status, "clEnqueueNDRangeKernel failed");
|
||||
return result;
|
||||
@@ -340,33 +417,46 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
|
||||
}
|
||||
|
||||
// If we are not verifying the checksum return here
|
||||
if (!verify_checksum) {
|
||||
log_info("Note: Allocations were not initialized so kernel execution can not verify correct results.\n");
|
||||
if (!verify_checksum)
|
||||
{
|
||||
log_info("Note: Allocations were not initialized so kernel execution "
|
||||
"can not verify correct results.\n");
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
// Verify the checksum.
|
||||
// Read back the result
|
||||
error = clEnqueueReadBuffer(*queue, result_mem, CL_TRUE, 0,
|
||||
sizeof(cl_uint) * NUM_OF_WORK_ITEMS,
|
||||
sizeof(cl_uint) * number_of_work_itmes,
|
||||
returned_results.data(), 0, NULL, NULL);
|
||||
test_error_abort(error, "clEnqueueReadBuffer failed");
|
||||
final_result = 0;
|
||||
if (test == BUFFER || test == IMAGE_READ || test == BUFFER_NON_BLOCKING || test == IMAGE_READ_NON_BLOCKING) {
|
||||
// For buffers or read images we are just looking at the sum of what each thread summed up
|
||||
for (i=0; i<NUM_OF_WORK_ITEMS; i++) {
|
||||
if (test == BUFFER || test == IMAGE_READ || test == BUFFER_NON_BLOCKING
|
||||
|| test == IMAGE_READ_NON_BLOCKING)
|
||||
{
|
||||
// For buffers or read images we are just looking at the sum of what
|
||||
// each thread summed up
|
||||
for (i = 0; i < number_of_work_itmes; i++)
|
||||
{
|
||||
final_result += returned_results[i];
|
||||
}
|
||||
if (final_result != checksum) {
|
||||
log_error("\t\tChecksum failed to verify. Expected %u got %u.\n", checksum, final_result);
|
||||
if (final_result != checksum)
|
||||
{
|
||||
log_error("\t\tChecksum failed to verify. Expected %u got %u.\n",
|
||||
checksum, final_result);
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
log_info("\t\tChecksum verified (%u == %u).\n", checksum, final_result);
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
// For write images we need to verify the values
|
||||
for (i=0; i<number_of_mems_used; i++) {
|
||||
if (check_image(*queue, mems[i])) {
|
||||
log_error("\t\tImage contents failed to verify for image %d.\n", (int)i);
|
||||
for (i = 0; i < number_of_mems_used; i++)
|
||||
{
|
||||
if (check_image(*queue, mems[i]))
|
||||
{
|
||||
log_error("\t\tImage contents failed to verify for image %d.\n",
|
||||
(int)i);
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
}
|
||||
@@ -376,7 +466,8 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
|
||||
// Finish the test
|
||||
error = clFinish(*queue);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
if (result != SUCCEEDED) {
|
||||
if (result != SUCCEEDED)
|
||||
{
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
|
||||
else
|
||||
@@ -386,5 +477,3 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
|
||||
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user