mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 14:09:03 +00:00
Some conformance tests use directly the size returned by the runtime for max memory size to allocate buffers. This doesn't leave enough memory for the system to run the tests.
2423 lines
84 KiB
C++
2423 lines
84 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "testBase.h"
|
|
#include "harness/typeWrappers.h"
|
|
#include "harness/testHarness.h"
|
|
#include <ctype.h>
|
|
#include <string.h>
|
|
|
|
const char *sample_single_param_kernel[] = {
|
|
"__kernel void sample_test(__global int *src)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
"\n"
|
|
"}\n"
|
|
};
|
|
|
|
|
|
const char *sample_read_image_kernel_pattern[] = {
|
|
"__kernel void sample_test( __global float *result, ",
|
|
" )\n"
|
|
"{\n"
|
|
" sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | "
|
|
"CLK_FILTER_NEAREST;\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" result[0] = 0.0f;\n",
|
|
"\n"
|
|
"}\n"
|
|
};
|
|
|
|
const char *sample_write_image_kernel_pattern[] = {
|
|
"__kernel void sample_test( ",
|
|
" )\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n",
|
|
"\n"
|
|
"}\n"
|
|
};
|
|
|
|
|
|
const char *sample_large_parmam_kernel_pattern[] = {
|
|
"__kernel void sample_test(%s, __global long *result)\n"
|
|
"{\n"
|
|
"result[0] = 0;\n"
|
|
"%s"
|
|
"\n"
|
|
"}\n"
|
|
};
|
|
|
|
const char *sample_large_int_parmam_kernel_pattern[] = {
|
|
"__kernel void sample_test(%s, __global int *result)\n"
|
|
"{\n"
|
|
"result[0] = 0;\n"
|
|
"%s"
|
|
"\n"
|
|
"}\n"
|
|
};
|
|
|
|
const char *sample_sampler_kernel_pattern[] = {
|
|
"__kernel void sample_test( read_only image2d_t src, __global int4 *dst",
|
|
", sampler_t sampler%d",
|
|
")\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n",
|
|
" dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n",
|
|
"\n"
|
|
"}\n"
|
|
};
|
|
|
|
const char *sample_const_arg_kernel[] = {
|
|
"__kernel void sample_test(__constant int *src1, __global int *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
"\n"
|
|
" dst[tid] = src1[tid];\n"
|
|
"\n"
|
|
"}\n"
|
|
};
|
|
|
|
const char *sample_local_arg_kernel[] = {
|
|
"__kernel void sample_test(__local int *src1, __global int *global_src, "
|
|
"__global int *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
"\n"
|
|
" src1[tid] = global_src[tid];\n"
|
|
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
|
" dst[tid] = src1[tid];\n"
|
|
"\n"
|
|
"}\n"
|
|
};
|
|
|
|
const char *sample_const_max_arg_kernel_pattern =
|
|
"__kernel void sample_test(__constant int *src1 %s, __global int *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" dst[tid] = src1[tid];\n"
|
|
"%s"
|
|
"\n"
|
|
"}\n";
|
|
|
|
#define MAX_REDUCTION_FACTOR 4
|
|
|
|
int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error, retVal;
|
|
unsigned int maxThreadDim, threadDim, i;
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
clMemWrapper streams[1];
|
|
size_t *threads, *localThreads;
|
|
cl_event event;
|
|
cl_int event_status;
|
|
|
|
|
|
/* Get the max thread dimensions */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
|
|
sizeof(maxThreadDim), &maxThreadDim, NULL);
|
|
test_error(error, "Unable to get max work item dimensions from device");
|
|
|
|
if (maxThreadDim < 3)
|
|
{
|
|
log_error("ERROR: Reported max work item dimensions is less than "
|
|
"required! (%d)\n",
|
|
maxThreadDim);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Reported max thread dimensions of %d.\n", maxThreadDim);
|
|
|
|
/* Create a kernel to test with */
|
|
if (create_single_kernel_helper(context, &program, &kernel, 1,
|
|
sample_single_param_kernel, "sample_test")
|
|
!= 0)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
/* Create some I/O streams */
|
|
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
|
sizeof(cl_int) * 100, NULL, &error);
|
|
if (streams[0] == NULL)
|
|
{
|
|
log_error("ERROR: Creating test array failed!\n");
|
|
return -1;
|
|
}
|
|
|
|
/* Set the arguments */
|
|
error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
|
|
test_error(error, "Unable to set kernel arguments");
|
|
|
|
retVal = 0;
|
|
|
|
/* Now try running the kernel with up to that many threads */
|
|
for (threadDim = 1; threadDim <= maxThreadDim; threadDim++)
|
|
{
|
|
threads = (size_t *)malloc(sizeof(size_t) * maxThreadDim);
|
|
localThreads = (size_t *)malloc(sizeof(size_t) * maxThreadDim);
|
|
for (i = 0; i < maxThreadDim; i++)
|
|
{
|
|
threads[i] = 1;
|
|
localThreads[i] = 1;
|
|
}
|
|
|
|
error = clEnqueueNDRangeKernel(queue, kernel, maxThreadDim, NULL,
|
|
threads, localThreads, 0, NULL, &event);
|
|
test_error(error, "Failed clEnqueueNDRangeKernel");
|
|
|
|
// Verify that the event does not return an error from the execution
|
|
error = clWaitForEvents(1, &event);
|
|
test_error(error, "clWaitForEvent failed");
|
|
error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
|
sizeof(event_status), &event_status, NULL);
|
|
test_error(
|
|
error,
|
|
"clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
|
|
clReleaseEvent(event);
|
|
if (event_status < 0)
|
|
test_error(error, "Kernel execution event returned error");
|
|
|
|
/* All done */
|
|
free(threads);
|
|
free(localThreads);
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
|
|
int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
size_t *deviceMaxWorkItemSize;
|
|
unsigned int maxWorkItemDim;
|
|
|
|
/* Get the max work item dimensions */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
|
|
sizeof(maxWorkItemDim), &maxWorkItemDim, NULL);
|
|
test_error(error, "Unable to get max work item dimensions from device");
|
|
|
|
log_info("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS returned %d\n",
|
|
maxWorkItemDim);
|
|
deviceMaxWorkItemSize = (size_t *)malloc(sizeof(size_t) * maxWorkItemDim);
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
|
sizeof(size_t) * maxWorkItemDim,
|
|
deviceMaxWorkItemSize, NULL);
|
|
test_error(error, "clDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_SIZES failed");
|
|
|
|
unsigned int i;
|
|
int errors = 0;
|
|
for (i = 0; i < maxWorkItemDim; i++)
|
|
{
|
|
if (deviceMaxWorkItemSize[i] < 1)
|
|
{
|
|
log_error("MAX_WORK_ITEM_SIZE in dimension %d is invalid: %lu\n", i,
|
|
deviceMaxWorkItemSize[i]);
|
|
errors++;
|
|
}
|
|
else
|
|
{
|
|
log_info("Dimension %d has max work item size %lu\n", i,
|
|
deviceMaxWorkItemSize[i]);
|
|
}
|
|
}
|
|
|
|
free(deviceMaxWorkItemSize);
|
|
|
|
if (errors) return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
int test_min_max_work_group_size(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
size_t deviceMaxThreadSize;
|
|
|
|
/* Get the max thread dimensions */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,
|
|
sizeof(deviceMaxThreadSize), &deviceMaxThreadSize,
|
|
NULL);
|
|
test_error(error, "Unable to get max work group size from device");
|
|
|
|
log_info("Reported %ld max device work group size.\n", deviceMaxThreadSize);
|
|
|
|
if (deviceMaxThreadSize == 0)
|
|
{
|
|
log_error("ERROR: Max work group size is reported as zero!\n");
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_read_image_args(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
unsigned int maxReadImages, i;
|
|
unsigned int deviceAddressSize;
|
|
clProgramWrapper program;
|
|
char readArgLine[128], *programSrc;
|
|
const char *readArgPattern = ", read_only image2d_t srcimg%d";
|
|
clKernelWrapper kernel;
|
|
clMemWrapper *streams, result;
|
|
size_t threads[2];
|
|
cl_image_format image_format_desc;
|
|
size_t maxParameterSize;
|
|
cl_event event;
|
|
cl_int event_status;
|
|
cl_float image_data[4 * 4];
|
|
float image_result = 0.0f;
|
|
float actual_image_result;
|
|
cl_uint minRequiredReadImages = gIsEmbedded ? 8 : 128;
|
|
cl_device_type deviceType;
|
|
|
|
PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
|
|
image_format_desc.image_channel_order = CL_RGBA;
|
|
image_format_desc.image_channel_data_type = CL_FLOAT;
|
|
|
|
/* Get the max read image arg count */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_READ_IMAGE_ARGS,
|
|
sizeof(maxReadImages), &maxReadImages, NULL);
|
|
test_error(error, "Unable to get max read image arg count from device");
|
|
|
|
if (maxReadImages < minRequiredReadImages)
|
|
{
|
|
log_error("ERROR: Reported max read image arg count is less than "
|
|
"required! (%d)\n",
|
|
maxReadImages);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Reported %d max read image args.\n", maxReadImages);
|
|
|
|
error =
|
|
clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS,
|
|
sizeof(deviceAddressSize), &deviceAddressSize, NULL);
|
|
test_error(error, "Unable to query CL_DEVICE_ADDRESS_BITS for device");
|
|
deviceAddressSize /= 8; // convert from bits to bytes
|
|
|
|
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
|
|
sizeof(maxParameterSize), &maxParameterSize, NULL);
|
|
test_error(error, "Unable to get max parameter size from device");
|
|
|
|
if (!gIsEmbedded && maxReadImages >= 128 && maxParameterSize == 1024)
|
|
{
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(deviceType),
|
|
&deviceType, NULL);
|
|
test_error(error, "Unable to get device type from device");
|
|
|
|
if (deviceType != CL_DEVICE_TYPE_CUSTOM)
|
|
{
|
|
maxReadImages = 127;
|
|
}
|
|
}
|
|
// Subtract the size of the result
|
|
maxParameterSize -= deviceAddressSize;
|
|
|
|
// Calculate the number we can use
|
|
if (maxParameterSize / deviceAddressSize < maxReadImages)
|
|
{
|
|
log_info("WARNING: Max parameter size of %d bytes limits test to %d "
|
|
"max image arguments.\n",
|
|
(int)maxParameterSize,
|
|
(int)(maxParameterSize / deviceAddressSize));
|
|
maxReadImages = (unsigned int)(maxParameterSize / deviceAddressSize);
|
|
}
|
|
|
|
/* Create a program with that many read args */
|
|
programSrc = (char *)malloc(strlen(sample_read_image_kernel_pattern[0])
|
|
+ (strlen(readArgPattern) + 6) * (maxReadImages)
|
|
+ strlen(sample_read_image_kernel_pattern[1])
|
|
+ 1 + 40240);
|
|
|
|
strcpy(programSrc, sample_read_image_kernel_pattern[0]);
|
|
strcat(programSrc, "read_only image2d_t srcimg0");
|
|
for (i = 0; i < maxReadImages - 1; i++)
|
|
{
|
|
sprintf(readArgLine, readArgPattern, i + 1);
|
|
strcat(programSrc, readArgLine);
|
|
}
|
|
strcat(programSrc, sample_read_image_kernel_pattern[1]);
|
|
for (i = 0; i < maxReadImages; i++)
|
|
{
|
|
sprintf(
|
|
readArgLine,
|
|
"\tresult[0] += read_imagef( srcimg%d, sampler, (int2)(0,0)).x;\n",
|
|
i);
|
|
strcat(programSrc, readArgLine);
|
|
}
|
|
strcat(programSrc, sample_read_image_kernel_pattern[2]);
|
|
|
|
error =
|
|
create_single_kernel_helper(context, &program, &kernel, 1,
|
|
(const char **)&programSrc, "sample_test");
|
|
test_error(error, "Failed to create the program and kernel.");
|
|
free(programSrc);
|
|
|
|
result = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float), NULL,
|
|
&error);
|
|
test_error(error, "clCreateBufer failed");
|
|
|
|
/* Create some I/O streams */
|
|
streams = new clMemWrapper[maxReadImages + 1];
|
|
for (i = 0; i < maxReadImages; i++)
|
|
{
|
|
image_data[0] = i;
|
|
image_result += image_data[0];
|
|
streams[i] =
|
|
create_image_2d(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
|
&image_format_desc, 4, 4, 0, image_data, &error);
|
|
test_error(error, "Unable to allocate test image");
|
|
}
|
|
|
|
error = clSetKernelArg(kernel, 0, sizeof(result), &result);
|
|
test_error(error, "Unable to set kernel arguments");
|
|
|
|
/* Set the arguments */
|
|
for (i = 1; i < maxReadImages + 1; i++)
|
|
{
|
|
error =
|
|
clSetKernelArg(kernel, i, sizeof(streams[i - 1]), &streams[i - 1]);
|
|
test_error(error, "Unable to set kernel arguments");
|
|
}
|
|
|
|
/* Now try running the kernel */
|
|
threads[0] = threads[1] = 1;
|
|
error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0,
|
|
NULL, &event);
|
|
test_error(error, "clEnqueueNDRangeKernel failed");
|
|
|
|
// Verify that the event does not return an error from the execution
|
|
error = clWaitForEvents(1, &event);
|
|
test_error(error, "clWaitForEvent failed");
|
|
error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
|
sizeof(event_status), &event_status, NULL);
|
|
test_error(error,
|
|
"clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
|
|
clReleaseEvent(event);
|
|
if (event_status < 0)
|
|
test_error(error, "Kernel execution event returned error");
|
|
|
|
error = clEnqueueReadBuffer(queue, result, CL_TRUE, 0, sizeof(cl_float),
|
|
&actual_image_result, 0, NULL, NULL);
|
|
test_error(error, "clEnqueueReadBuffer failed");
|
|
|
|
delete[] streams;
|
|
|
|
if (actual_image_result != image_result)
|
|
{
|
|
log_error("Result failed to verify. Got %g, expected %g.\n",
|
|
actual_image_result, image_result);
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_write_image_args(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
unsigned int maxWriteImages, i;
|
|
clProgramWrapper program;
|
|
char writeArgLine[128], *programSrc;
|
|
const char *writeArgPattern = ", write_only image2d_t dstimg%d";
|
|
clKernelWrapper kernel;
|
|
clMemWrapper *streams;
|
|
size_t threads[2];
|
|
cl_image_format image_format_desc;
|
|
size_t maxParameterSize;
|
|
cl_event event;
|
|
cl_int event_status;
|
|
cl_uint minRequiredWriteImages = gIsEmbedded ? 1 : 8;
|
|
|
|
|
|
PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
|
|
image_format_desc.image_channel_order = CL_RGBA;
|
|
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
|
|
|
|
/* Get the max read image arg count */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WRITE_IMAGE_ARGS,
|
|
sizeof(maxWriteImages), &maxWriteImages, NULL);
|
|
test_error(error, "Unable to get max write image arg count from device");
|
|
|
|
if (maxWriteImages == 0)
|
|
{
|
|
log_info(
|
|
"WARNING: Device reports 0 for a max write image arg count (write "
|
|
"image arguments unsupported). Skipping test (implicitly passes). "
|
|
"This is only valid if the number of image formats is also 0.\n");
|
|
return 0;
|
|
}
|
|
|
|
if (maxWriteImages < minRequiredWriteImages)
|
|
{
|
|
log_error("ERROR: Reported max write image arg count is less than "
|
|
"required! (%d)\n",
|
|
maxWriteImages);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Reported %d max write image args.\n", maxWriteImages);
|
|
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
|
|
sizeof(maxParameterSize), &maxParameterSize, NULL);
|
|
test_error(error, "Unable to get max parameter size from device");
|
|
|
|
// Calculate the number we can use
|
|
if (maxParameterSize / sizeof(cl_mem) < maxWriteImages)
|
|
{
|
|
log_info("WARNING: Max parameter size of %d bytes limits test to %d "
|
|
"max image arguments.\n",
|
|
(int)maxParameterSize,
|
|
(int)(maxParameterSize / sizeof(cl_mem)));
|
|
maxWriteImages = (unsigned int)(maxParameterSize / sizeof(cl_mem));
|
|
}
|
|
|
|
/* Create a program with that many write args + 1 */
|
|
programSrc = (char *)malloc(
|
|
strlen(sample_write_image_kernel_pattern[0])
|
|
+ (strlen(writeArgPattern) + 6) * (maxWriteImages + 1)
|
|
+ strlen(sample_write_image_kernel_pattern[1]) + 1 + 40240);
|
|
|
|
strcpy(programSrc, sample_write_image_kernel_pattern[0]);
|
|
strcat(programSrc, "write_only image2d_t dstimg0");
|
|
for (i = 1; i < maxWriteImages; i++)
|
|
{
|
|
sprintf(writeArgLine, writeArgPattern, i);
|
|
strcat(programSrc, writeArgLine);
|
|
}
|
|
strcat(programSrc, sample_write_image_kernel_pattern[1]);
|
|
for (i = 0; i < maxWriteImages; i++)
|
|
{
|
|
sprintf(writeArgLine,
|
|
"\twrite_imagef( dstimg%d, (int2)(0,0), (float4)(0,0,0,0));\n",
|
|
i);
|
|
strcat(programSrc, writeArgLine);
|
|
}
|
|
strcat(programSrc, sample_write_image_kernel_pattern[2]);
|
|
|
|
error =
|
|
create_single_kernel_helper(context, &program, &kernel, 1,
|
|
(const char **)&programSrc, "sample_test");
|
|
test_error(error, "Failed to create the program and kernel.");
|
|
free(programSrc);
|
|
|
|
|
|
/* Create some I/O streams */
|
|
streams = new clMemWrapper[maxWriteImages + 1];
|
|
for (i = 0; i < maxWriteImages; i++)
|
|
{
|
|
streams[i] =
|
|
create_image_2d(context, CL_MEM_READ_WRITE, &image_format_desc, 16,
|
|
16, 0, NULL, &error);
|
|
test_error(error, "Unable to allocate test image");
|
|
}
|
|
|
|
/* Set the arguments */
|
|
for (i = 0; i < maxWriteImages; i++)
|
|
{
|
|
error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
|
|
test_error(error, "Unable to set kernel arguments");
|
|
}
|
|
|
|
/* Now try running the kernel */
|
|
threads[0] = threads[1] = 16;
|
|
error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0,
|
|
NULL, &event);
|
|
test_error(error, "clEnqueueNDRangeKernel failed.");
|
|
|
|
// Verify that the event does not return an error from the execution
|
|
error = clWaitForEvents(1, &event);
|
|
test_error(error, "clWaitForEvent failed");
|
|
error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
|
sizeof(event_status), &event_status, NULL);
|
|
test_error(error,
|
|
"clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
|
|
clReleaseEvent(event);
|
|
if (event_status < 0)
|
|
test_error(error, "Kernel execution event returned error");
|
|
|
|
/* All done */
|
|
delete[] streams;
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
cl_ulong maxAllocSize, memSize, minSizeToTry, currentSize;
|
|
clMemWrapper memHdl;
|
|
|
|
cl_ulong requiredAllocSize;
|
|
|
|
if (gIsEmbedded)
|
|
requiredAllocSize = 1 * 1024 * 1024;
|
|
else
|
|
requiredAllocSize = 128 * 1024 * 1024;
|
|
|
|
/* Get the max mem alloc size, limit the alloc to half of the available
|
|
* memory */
|
|
maxAllocSize = get_device_info_max_mem_alloc_size(
|
|
deviceID, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
|
|
memSize = get_device_info_global_mem_size(deviceID,
|
|
MAX_DEVICE_MEMORY_SIZE_DIVISOR);
|
|
|
|
if (memSize < maxAllocSize)
|
|
{
|
|
log_info("Global memory size is less than max allocation size, using "
|
|
"that.\n");
|
|
maxAllocSize = memSize;
|
|
}
|
|
|
|
if (memSize > (cl_ulong)SIZE_MAX)
|
|
{
|
|
memSize = (cl_ulong)SIZE_MAX;
|
|
}
|
|
|
|
if (maxAllocSize < requiredAllocSize)
|
|
{
|
|
log_error("ERROR: Reported max allocation size is less than required");
|
|
return -1;
|
|
}
|
|
|
|
log_info("Reported max allocation size of %lld bytes (%gMB) and global mem "
|
|
"size of %lld bytes (%gMB).\n",
|
|
maxAllocSize, maxAllocSize / (1024.0 * 1024.0), memSize,
|
|
memSize / (1024.0 * 1024.0));
|
|
|
|
minSizeToTry = maxAllocSize / 16;
|
|
currentSize = maxAllocSize;
|
|
while (currentSize >= maxAllocSize / MAX_REDUCTION_FACTOR)
|
|
{
|
|
|
|
log_info("Trying to create a buffer of size of %lld bytes (%gMB).\n",
|
|
currentSize, (double)currentSize / (1024.0 * 1024.0));
|
|
memHdl = clCreateBuffer(context, CL_MEM_READ_ONLY, (size_t)currentSize,
|
|
NULL, &error);
|
|
if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE
|
|
|| error == CL_OUT_OF_RESOURCES || error == CL_OUT_OF_HOST_MEMORY)
|
|
{
|
|
log_info("\tAllocation failed at size of %lld bytes (%gMB).\n",
|
|
currentSize, (double)currentSize / (1024.0 * 1024.0));
|
|
currentSize -= minSizeToTry;
|
|
continue;
|
|
}
|
|
test_error(error, "clCreateBuffer failed for maximum sized buffer.");
|
|
return 0;
|
|
}
|
|
log_error("Failed to allocate even %lld bytes (%gMB).\n", currentSize,
|
|
(double)currentSize / (1024.0 * 1024.0));
|
|
return -1;
|
|
}
|
|
|
|
int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
size_t maxDimension;
|
|
clMemWrapper streams[1];
|
|
cl_image_format image_format_desc;
|
|
cl_ulong maxAllocSize;
|
|
cl_uint minRequiredDimension;
|
|
|
|
PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
|
|
|
|
auto version = get_device_cl_version(deviceID);
|
|
if (version == Version(1, 0))
|
|
{
|
|
minRequiredDimension = gIsEmbedded ? 2048 : 4096;
|
|
}
|
|
else
|
|
{
|
|
minRequiredDimension = gIsEmbedded ? 2048 : 8192;
|
|
}
|
|
|
|
|
|
/* Just get any ol format to test with */
|
|
error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D,
|
|
CL_MEM_READ_WRITE, 0, &image_format_desc);
|
|
test_error(error, "Unable to obtain suitable image format to test with!");
|
|
|
|
/* Get the max 2d image width */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE2D_MAX_WIDTH,
|
|
sizeof(maxDimension), &maxDimension, NULL);
|
|
test_error(error, "Unable to get max image 2d width from device");
|
|
|
|
if (maxDimension < minRequiredDimension)
|
|
{
|
|
log_error(
|
|
"ERROR: Reported max image 2d width is less than required! (%d)\n",
|
|
(int)maxDimension);
|
|
return -1;
|
|
}
|
|
log_info("Max reported width is %ld.\n", maxDimension);
|
|
|
|
/* Verify we can use the format */
|
|
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
|
|
image_format_desc.image_channel_order = CL_RGBA;
|
|
if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
|
|
CL_MEM_OBJECT_IMAGE2D, &image_format_desc))
|
|
{
|
|
log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
|
|
return -1;
|
|
}
|
|
|
|
/* Verify that we can actually allocate an image that large */
|
|
maxAllocSize = get_device_info_max_mem_alloc_size(
|
|
deviceID, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
|
|
if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize)
|
|
{
|
|
log_error("Can not allocate a large enough image (min size: %lld "
|
|
"bytes, max allowed: %lld bytes) to test.\n",
|
|
(cl_ulong)maxDimension * 1 * 4, maxAllocSize);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Attempting to create an image of size %d x 1 = %gMB.\n",
|
|
(int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0));
|
|
|
|
/* Try to allocate a very big image */
|
|
streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &image_format_desc,
|
|
maxDimension, 1, 0, NULL, &error);
|
|
if ((streams[0] == NULL) || (error != CL_SUCCESS))
|
|
{
|
|
print_error(error, "Image 2D creation failed for maximum width");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
size_t maxDimension;
|
|
clMemWrapper streams[1];
|
|
cl_image_format image_format_desc;
|
|
cl_ulong maxAllocSize;
|
|
cl_uint minRequiredDimension;
|
|
|
|
PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
|
|
|
|
auto version = get_device_cl_version(deviceID);
|
|
if (version == Version(1, 0))
|
|
{
|
|
minRequiredDimension = gIsEmbedded ? 2048 : 4096;
|
|
}
|
|
else
|
|
{
|
|
minRequiredDimension = gIsEmbedded ? 2048 : 8192;
|
|
}
|
|
|
|
/* Just get any ol format to test with */
|
|
error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D,
|
|
CL_MEM_READ_WRITE, 0, &image_format_desc);
|
|
test_error(error, "Unable to obtain suitable image format to test with!");
|
|
|
|
/* Get the max 2d image width */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
|
|
sizeof(maxDimension), &maxDimension, NULL);
|
|
test_error(error, "Unable to get max image 2d height from device");
|
|
|
|
if (maxDimension < minRequiredDimension)
|
|
{
|
|
log_error(
|
|
"ERROR: Reported max image 2d height is less than required! (%d)\n",
|
|
(int)maxDimension);
|
|
return -1;
|
|
}
|
|
log_info("Max reported height is %ld.\n", maxDimension);
|
|
|
|
/* Verify we can use the format */
|
|
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
|
|
image_format_desc.image_channel_order = CL_RGBA;
|
|
if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
|
|
CL_MEM_OBJECT_IMAGE2D, &image_format_desc))
|
|
{
|
|
log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
|
|
return -1;
|
|
}
|
|
|
|
/* Verify that we can actually allocate an image that large */
|
|
maxAllocSize = get_device_info_max_mem_alloc_size(
|
|
deviceID, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
|
|
if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize)
|
|
{
|
|
log_error("Can not allocate a large enough image (min size: %lld "
|
|
"bytes, max allowed: %lld bytes) to test.\n",
|
|
(cl_ulong)maxDimension * 1 * 4, maxAllocSize);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Attempting to create an image of size 1 x %d = %gMB.\n",
|
|
(int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0));
|
|
|
|
/* Try to allocate a very big image */
|
|
streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &image_format_desc,
|
|
1, maxDimension, 0, NULL, &error);
|
|
if ((streams[0] == NULL) || (error != CL_SUCCESS))
|
|
{
|
|
print_error(error, "Image 2D creation failed for maximum height");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
size_t maxDimension;
|
|
clMemWrapper streams[1];
|
|
cl_image_format image_format_desc;
|
|
cl_ulong maxAllocSize;
|
|
|
|
|
|
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID)
|
|
|
|
/* Just get any ol format to test with */
|
|
error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
|
|
CL_MEM_READ_ONLY, 0, &image_format_desc);
|
|
test_error(error, "Unable to obtain suitable image format to test with!");
|
|
|
|
/* Get the max 2d image width */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_WIDTH,
|
|
sizeof(maxDimension), &maxDimension, NULL);
|
|
test_error(error, "Unable to get max image 3d width from device");
|
|
|
|
if (maxDimension < 2048)
|
|
{
|
|
log_error(
|
|
"ERROR: Reported max image 3d width is less than required! (%d)\n",
|
|
(int)maxDimension);
|
|
return -1;
|
|
}
|
|
log_info("Max reported width is %ld.\n", maxDimension);
|
|
|
|
/* Verify we can use the format */
|
|
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
|
|
image_format_desc.image_channel_order = CL_RGBA;
|
|
if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
|
|
CL_MEM_OBJECT_IMAGE3D, &image_format_desc))
|
|
{
|
|
log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
|
|
return -1;
|
|
}
|
|
|
|
/* Verify that we can actually allocate an image that large */
|
|
maxAllocSize = get_device_info_max_mem_alloc_size(
|
|
deviceID, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
|
|
if ((cl_ulong)maxDimension * 2 * 4 > maxAllocSize)
|
|
{
|
|
log_error("Can not allocate a large enough image (min size: %lld "
|
|
"bytes, max allowed: %lld bytes) to test.\n",
|
|
(cl_ulong)maxDimension * 2 * 4, maxAllocSize);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Attempting to create an image of size %d x 1 x 2 = %gMB.\n",
|
|
(int)maxDimension,
|
|
(2 * (float)maxDimension * 4 / 1024.0 / 1024.0));
|
|
|
|
/* Try to allocate a very big image */
|
|
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc,
|
|
maxDimension, 1, 2, 0, 0, NULL, &error);
|
|
if ((streams[0] == NULL) || (error != CL_SUCCESS))
|
|
{
|
|
print_error(error, "Image 3D creation failed for maximum width");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
size_t maxDimension;
|
|
clMemWrapper streams[1];
|
|
cl_image_format image_format_desc;
|
|
cl_ulong maxAllocSize;
|
|
|
|
|
|
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID)
|
|
|
|
/* Just get any ol format to test with */
|
|
error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
|
|
CL_MEM_READ_ONLY, 0, &image_format_desc);
|
|
test_error(error, "Unable to obtain suitable image format to test with!");
|
|
|
|
/* Get the max 2d image width */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
|
|
sizeof(maxDimension), &maxDimension, NULL);
|
|
test_error(error, "Unable to get max image 3d height from device");
|
|
|
|
if (maxDimension < 2048)
|
|
{
|
|
log_error(
|
|
"ERROR: Reported max image 3d height is less than required! (%d)\n",
|
|
(int)maxDimension);
|
|
return -1;
|
|
}
|
|
log_info("Max reported height is %ld.\n", maxDimension);
|
|
|
|
/* Verify we can use the format */
|
|
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
|
|
image_format_desc.image_channel_order = CL_RGBA;
|
|
if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
|
|
CL_MEM_OBJECT_IMAGE3D, &image_format_desc))
|
|
{
|
|
log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
|
|
return -1;
|
|
}
|
|
|
|
/* Verify that we can actually allocate an image that large */
|
|
maxAllocSize = get_device_info_max_mem_alloc_size(
|
|
deviceID, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
|
|
if ((cl_ulong)maxDimension * 2 * 4 > maxAllocSize)
|
|
{
|
|
log_error("Can not allocate a large enough image (min size: %lld "
|
|
"bytes, max allowed: %lld bytes) to test.\n",
|
|
(cl_ulong)maxDimension * 2 * 4, maxAllocSize);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Attempting to create an image of size 1 x %d x 2 = %gMB.\n",
|
|
(int)maxDimension,
|
|
(2 * (float)maxDimension * 4 / 1024.0 / 1024.0));
|
|
|
|
/* Try to allocate a very big image */
|
|
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc,
|
|
1, maxDimension, 2, 0, 0, NULL, &error);
|
|
if ((streams[0] == NULL) || (error != CL_SUCCESS))
|
|
{
|
|
print_error(error, "Image 3D creation failed for maximum height");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
size_t maxDimension;
|
|
clMemWrapper streams[1];
|
|
cl_image_format image_format_desc;
|
|
cl_ulong maxAllocSize;
|
|
|
|
|
|
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID)
|
|
|
|
/* Just get any ol format to test with */
|
|
error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
|
|
CL_MEM_READ_ONLY, 0, &image_format_desc);
|
|
test_error(error, "Unable to obtain suitable image format to test with!");
|
|
|
|
/* Get the max 2d image width */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_DEPTH,
|
|
sizeof(maxDimension), &maxDimension, NULL);
|
|
test_error(error, "Unable to get max image 3d depth from device");
|
|
|
|
if (maxDimension < 2048)
|
|
{
|
|
log_error(
|
|
"ERROR: Reported max image 3d depth is less than required! (%d)\n",
|
|
(int)maxDimension);
|
|
return -1;
|
|
}
|
|
log_info("Max reported depth is %ld.\n", maxDimension);
|
|
|
|
/* Verify we can use the format */
|
|
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
|
|
image_format_desc.image_channel_order = CL_RGBA;
|
|
if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
|
|
CL_MEM_OBJECT_IMAGE3D, &image_format_desc))
|
|
{
|
|
log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
|
|
return -1;
|
|
}
|
|
|
|
/* Verify that we can actually allocate an image that large */
|
|
maxAllocSize = get_device_info_max_mem_alloc_size(
|
|
deviceID, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
|
|
if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize)
|
|
{
|
|
log_error("Can not allocate a large enough image (min size: %lld "
|
|
"bytes, max allowed: %lld bytes) to test.\n",
|
|
(cl_ulong)maxDimension * 1 * 4, maxAllocSize);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n",
|
|
(int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0));
|
|
|
|
/* Try to allocate a very big image */
|
|
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc,
|
|
1, 1, maxDimension, 0, 0, NULL, &error);
|
|
if ((streams[0] == NULL) || (error != CL_SUCCESS))
|
|
{
|
|
print_error(error, "Image 3D creation failed for maximum depth");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_image_array_size(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
size_t maxDimension;
|
|
clMemWrapper streams[1];
|
|
cl_image_format image_format_desc;
|
|
cl_ulong maxAllocSize;
|
|
size_t minRequiredDimension = gIsEmbedded ? 256 : 2048;
|
|
|
|
PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID);
|
|
|
|
/* Just get any ol format to test with */
|
|
error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D_ARRAY,
|
|
CL_MEM_READ_WRITE, 0, &image_format_desc);
|
|
test_error(error, "Unable to obtain suitable image format to test with!");
|
|
|
|
/* Get the max image array width */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE,
|
|
sizeof(maxDimension), &maxDimension, NULL);
|
|
test_error(error, "Unable to get max image array size from device");
|
|
|
|
if (maxDimension < minRequiredDimension)
|
|
{
|
|
log_error("ERROR: Reported max image array size is less than required! "
|
|
"(%d)\n",
|
|
(int)maxDimension);
|
|
return -1;
|
|
}
|
|
log_info("Max reported image array size is %ld.\n", maxDimension);
|
|
|
|
/* Verify we can use the format */
|
|
image_format_desc.image_channel_data_type = CL_UNORM_INT8;
|
|
image_format_desc.image_channel_order = CL_RGBA;
|
|
if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
|
|
CL_MEM_OBJECT_IMAGE2D_ARRAY,
|
|
&image_format_desc))
|
|
{
|
|
log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
|
|
return -1;
|
|
}
|
|
|
|
/* Verify that we can actually allocate an image that large */
|
|
maxAllocSize = get_device_info_max_mem_alloc_size(
|
|
deviceID, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
|
|
if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize)
|
|
{
|
|
log_error("Can not allocate a large enough image (min size: %lld "
|
|
"bytes, max allowed: %lld bytes) to test.\n",
|
|
(cl_ulong)maxDimension * 1 * 4, maxAllocSize);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n",
|
|
(int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0));
|
|
|
|
/* Try to allocate a very big image */
|
|
streams[0] =
|
|
create_image_2d_array(context, CL_MEM_READ_ONLY, &image_format_desc, 1,
|
|
1, maxDimension, 0, 0, NULL, &error);
|
|
if ((streams[0] == NULL) || (error != CL_SUCCESS))
|
|
{
|
|
print_error(error,
|
|
"2D Image Array creation failed for maximum array size");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
size_t maxDimensionPixels;
|
|
clMemWrapper streams[2];
|
|
cl_image_format image_format_desc = { 0 };
|
|
cl_ulong maxAllocSize;
|
|
size_t minRequiredDimension = gIsEmbedded ? 2048 : 65536;
|
|
unsigned int i = 0;
|
|
size_t pixelBytes = 0;
|
|
|
|
PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID);
|
|
|
|
/* Get the max memory allocation size, divide it */
|
|
maxAllocSize = get_device_info_max_mem_alloc_size(
|
|
deviceID, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
|
|
|
|
/* Get the max image array width */
|
|
error =
|
|
clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE,
|
|
sizeof(maxDimensionPixels), &maxDimensionPixels, NULL);
|
|
test_error(error, "Unable to get max image buffer size from device");
|
|
|
|
if (maxDimensionPixels < minRequiredDimension)
|
|
{
|
|
log_error("ERROR: Reported max image buffer size is less than "
|
|
"required! (%d)\n",
|
|
(int)maxDimensionPixels);
|
|
return -1;
|
|
}
|
|
log_info("Max reported image buffer size is %ld pixels.\n",
|
|
maxDimensionPixels);
|
|
|
|
pixelBytes = maxAllocSize / maxDimensionPixels;
|
|
if (pixelBytes == 0)
|
|
{
|
|
log_error("Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than "
|
|
"CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image "
|
|
"of maximum size!\n");
|
|
return -1;
|
|
}
|
|
|
|
error = -1;
|
|
for (i = pixelBytes; i > 0; --i)
|
|
{
|
|
error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE1D,
|
|
CL_MEM_READ_ONLY, i, &image_format_desc);
|
|
if (error == CL_SUCCESS)
|
|
{
|
|
pixelBytes = i;
|
|
break;
|
|
}
|
|
}
|
|
test_error(error,
|
|
"Device does not support format to be used to allocate image of "
|
|
"CL_DEVICE_IMAGE_MAX_BUFFER_SIZE\n");
|
|
|
|
log_info("Attempting to create an 1D image with channel order %s from "
|
|
"buffer of size %d = %gMB.\n",
|
|
GetChannelOrderName(image_format_desc.image_channel_order),
|
|
(int)maxDimensionPixels,
|
|
((float)maxDimensionPixels * pixelBytes / 1024.0 / 1024.0));
|
|
|
|
/* Try to allocate a buffer */
|
|
streams[0] = clCreateBuffer(context, CL_MEM_READ_ONLY,
|
|
maxDimensionPixels * pixelBytes, NULL, &error);
|
|
if ((streams[0] == NULL) || (error != CL_SUCCESS))
|
|
{
|
|
print_error(error,
|
|
"Buffer creation failed for maximum image buffer size");
|
|
return -1;
|
|
}
|
|
|
|
/* Try to allocate a 1D image array from buffer */
|
|
streams[1] =
|
|
create_image_1d(context, CL_MEM_READ_ONLY, &image_format_desc,
|
|
maxDimensionPixels, 0, NULL, streams[0], &error);
|
|
if ((streams[0] == NULL) || (error != CL_SUCCESS))
|
|
{
|
|
print_error(error,
|
|
"1D Image from buffer creation failed for maximum image "
|
|
"buffer size");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
int test_min_max_parameter_size(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error, i;
|
|
size_t maxSize;
|
|
char *programSrc;
|
|
char *ptr;
|
|
size_t numberExpected;
|
|
long numberOfIntParametersToTry;
|
|
char *argumentLine, *codeLines;
|
|
void *data;
|
|
cl_long long_result, expectedResult;
|
|
cl_int int_result;
|
|
size_t decrement;
|
|
cl_event event;
|
|
cl_int event_status;
|
|
bool embeddedNoLong = gIsEmbedded && !gHasLong;
|
|
|
|
|
|
/* Get the max param size */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
|
|
sizeof(maxSize), &maxSize, NULL);
|
|
test_error(error, "Unable to get max parameter size from device");
|
|
|
|
|
|
if (((!gIsEmbedded) && (maxSize < 1024))
|
|
|| ((gIsEmbedded) && (maxSize < 256)))
|
|
{
|
|
log_error(
|
|
"ERROR: Reported max parameter size is less than required! (%d)\n",
|
|
(int)maxSize);
|
|
return -1;
|
|
}
|
|
|
|
/* The embedded profile without cles_khr_int64 extension does not require
|
|
* longs, so use ints */
|
|
if (embeddedNoLong)
|
|
numberOfIntParametersToTry = numberExpected =
|
|
(maxSize - sizeof(cl_mem)) / sizeof(cl_int);
|
|
else
|
|
numberOfIntParametersToTry = numberExpected =
|
|
(maxSize - sizeof(cl_mem)) / sizeof(cl_long);
|
|
|
|
decrement = (size_t)(numberOfIntParametersToTry / 8);
|
|
if (decrement < 1) decrement = 1;
|
|
log_info("Reported max parameter size of %d bytes.\n", (int)maxSize);
|
|
|
|
while (numberOfIntParametersToTry > 0)
|
|
{
|
|
// These need to be inside to be deallocated automatically on each loop
|
|
// iteration.
|
|
clProgramWrapper program;
|
|
clMemWrapper mem;
|
|
clKernelWrapper kernel;
|
|
|
|
if (embeddedNoLong)
|
|
{
|
|
log_info(
|
|
"Trying a kernel with %ld int arguments (%ld bytes) and one "
|
|
"cl_mem (%ld bytes) for %ld bytes total.\n",
|
|
numberOfIntParametersToTry,
|
|
sizeof(cl_int) * numberOfIntParametersToTry, sizeof(cl_mem),
|
|
sizeof(cl_mem) + numberOfIntParametersToTry * sizeof(cl_int));
|
|
}
|
|
else
|
|
{
|
|
log_info(
|
|
"Trying a kernel with %ld long arguments (%ld bytes) and one "
|
|
"cl_mem (%ld bytes) for %ld bytes total.\n",
|
|
numberOfIntParametersToTry,
|
|
sizeof(cl_long) * numberOfIntParametersToTry, sizeof(cl_mem),
|
|
sizeof(cl_mem) + numberOfIntParametersToTry * sizeof(cl_long));
|
|
}
|
|
|
|
// Allocate memory for the program storage
|
|
data = malloc(sizeof(cl_long) * numberOfIntParametersToTry);
|
|
|
|
argumentLine =
|
|
(char *)malloc(sizeof(char) * numberOfIntParametersToTry * 32);
|
|
codeLines =
|
|
(char *)malloc(sizeof(char) * numberOfIntParametersToTry * 32);
|
|
programSrc = (char *)malloc(sizeof(char)
|
|
* (numberOfIntParametersToTry * 64 + 1024));
|
|
argumentLine[0] = '\0';
|
|
codeLines[0] = '\0';
|
|
programSrc[0] = '\0';
|
|
|
|
// Generate our results
|
|
expectedResult = 0;
|
|
for (i = 0; i < (int)numberOfIntParametersToTry; i++)
|
|
{
|
|
if (gHasLong)
|
|
{
|
|
((cl_long *)data)[i] = i;
|
|
expectedResult += i;
|
|
}
|
|
else
|
|
{
|
|
((cl_int *)data)[i] = i;
|
|
expectedResult += i;
|
|
}
|
|
}
|
|
|
|
// Build the program
|
|
if (gHasLong)
|
|
sprintf(argumentLine, "%s", "long arg0");
|
|
else
|
|
sprintf(argumentLine, "%s", "int arg0");
|
|
|
|
sprintf(codeLines, "%s", "result[0] += arg0;");
|
|
for (i = 1; i < (int)numberOfIntParametersToTry; i++)
|
|
{
|
|
if (gHasLong)
|
|
sprintf(argumentLine + strlen(argumentLine), ", long arg%d", i);
|
|
else
|
|
sprintf(argumentLine + strlen(argumentLine), ", int arg%d", i);
|
|
|
|
sprintf(codeLines + strlen(codeLines), "\nresult[0] += arg%d;", i);
|
|
}
|
|
|
|
/* Create a kernel to test with */
|
|
sprintf(programSrc,
|
|
gHasLong ? sample_large_parmam_kernel_pattern[0]
|
|
: sample_large_int_parmam_kernel_pattern[0],
|
|
argumentLine, codeLines);
|
|
|
|
ptr = programSrc;
|
|
if (create_single_kernel_helper(context, &program, &kernel, 1,
|
|
(const char **)&ptr, "sample_test")
|
|
!= 0)
|
|
{
|
|
log_info("Create program failed, decrementing number of parameters "
|
|
"to try.\n");
|
|
numberOfIntParametersToTry -= decrement;
|
|
continue;
|
|
}
|
|
|
|
/* Try to set a large argument to the kernel */
|
|
mem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long), NULL,
|
|
&error);
|
|
test_error(error, "clCreateBuffer failed");
|
|
|
|
for (i = 0; i < (int)numberOfIntParametersToTry; i++)
|
|
{
|
|
if (gHasLong)
|
|
error = clSetKernelArg(kernel, i, sizeof(cl_long),
|
|
&(((cl_long *)data)[i]));
|
|
else
|
|
error = clSetKernelArg(kernel, i, sizeof(cl_int),
|
|
&(((cl_int *)data)[i]));
|
|
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_info("clSetKernelArg failed (%s), decrementing number of "
|
|
"parameters to try.\n",
|
|
IGetErrorString(error));
|
|
numberOfIntParametersToTry -= decrement;
|
|
break;
|
|
}
|
|
}
|
|
if (error != CL_SUCCESS) continue;
|
|
|
|
|
|
error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mem);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_info("clSetKernelArg failed (%s), decrementing number of "
|
|
"parameters to try.\n",
|
|
IGetErrorString(error));
|
|
numberOfIntParametersToTry -= decrement;
|
|
continue;
|
|
}
|
|
|
|
size_t globalDim[3] = { 1, 1, 1 }, localDim[3] = { 1, 1, 1 };
|
|
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim,
|
|
localDim, 0, NULL, &event);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_info("clEnqueueNDRangeKernel failed (%s), decrementing number "
|
|
"of parameters to try.\n",
|
|
IGetErrorString(error));
|
|
numberOfIntParametersToTry -= decrement;
|
|
continue;
|
|
}
|
|
|
|
// Verify that the event does not return an error from the execution
|
|
error = clWaitForEvents(1, &event);
|
|
test_error(error, "clWaitForEvent failed");
|
|
error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
|
sizeof(event_status), &event_status, NULL);
|
|
test_error(
|
|
error,
|
|
"clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
|
|
clReleaseEvent(event);
|
|
if (event_status < 0)
|
|
test_error(error, "Kernel execution event returned error");
|
|
|
|
if (gHasLong)
|
|
error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_long),
|
|
&long_result, 0, NULL, NULL);
|
|
else
|
|
error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_int),
|
|
&int_result, 0, NULL, NULL);
|
|
|
|
test_error(error, "clEnqueueReadBuffer failed")
|
|
|
|
free(data);
|
|
free(argumentLine);
|
|
free(codeLines);
|
|
free(programSrc);
|
|
|
|
if (gHasLong)
|
|
{
|
|
if (long_result != expectedResult)
|
|
{
|
|
log_error("Expected result (%lld) does not equal actual result "
|
|
"(%lld).\n",
|
|
expectedResult, long_result);
|
|
numberOfIntParametersToTry -= decrement;
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
log_info("Results verified at %ld bytes of arguments.\n",
|
|
sizeof(cl_mem)
|
|
+ numberOfIntParametersToTry * sizeof(cl_long));
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (int_result != expectedResult)
|
|
{
|
|
log_error("Expected result (%lld) does not equal actual result "
|
|
"(%d).\n",
|
|
expectedResult, int_result);
|
|
numberOfIntParametersToTry -= decrement;
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
log_info("Results verified at %ld bytes of arguments.\n",
|
|
sizeof(cl_mem)
|
|
+ numberOfIntParametersToTry * sizeof(cl_int));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (numberOfIntParametersToTry == (long)numberExpected) return 0;
|
|
return -1;
|
|
}
|
|
|
|
int test_min_max_samplers(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
cl_uint maxSamplers, i;
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
char *programSrc, samplerLine[1024];
|
|
size_t maxParameterSize;
|
|
cl_event event;
|
|
cl_int event_status;
|
|
cl_uint minRequiredSamplers = gIsEmbedded ? 8 : 16;
|
|
|
|
|
|
PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
|
|
|
|
/* Get the max value */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_SAMPLERS,
|
|
sizeof(maxSamplers), &maxSamplers, NULL);
|
|
test_error(error, "Unable to get max sampler count from device");
|
|
|
|
if (maxSamplers < minRequiredSamplers)
|
|
{
|
|
log_error(
|
|
"ERROR: Reported max sampler count is less than required! (%d)\n",
|
|
(int)maxSamplers);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Reported max %d samplers.\n", maxSamplers);
|
|
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
|
|
sizeof(maxParameterSize), &maxParameterSize, NULL);
|
|
test_error(error, "Unable to get max parameter size from device");
|
|
|
|
// Subtract the size of the result
|
|
maxParameterSize -= 2 * sizeof(cl_mem);
|
|
|
|
// Calculate the number we can use
|
|
if (maxParameterSize / sizeof(cl_sampler) < maxSamplers)
|
|
{
|
|
log_info("WARNING: Max parameter size of %d bytes limits test to %d "
|
|
"max sampler arguments.\n",
|
|
(int)maxParameterSize,
|
|
(int)(maxParameterSize / sizeof(cl_sampler)));
|
|
maxSamplers = (unsigned int)(maxParameterSize / sizeof(cl_sampler));
|
|
}
|
|
|
|
/* Create a kernel to test with */
|
|
programSrc = (char *)malloc(
|
|
(strlen(sample_sampler_kernel_pattern[1]) + 8) * (maxSamplers)
|
|
+ strlen(sample_sampler_kernel_pattern[0])
|
|
+ strlen(sample_sampler_kernel_pattern[2])
|
|
+ (strlen(sample_sampler_kernel_pattern[3]) + 8) * maxSamplers
|
|
+ strlen(sample_sampler_kernel_pattern[4]));
|
|
strcpy(programSrc, sample_sampler_kernel_pattern[0]);
|
|
for (i = 0; i < maxSamplers; i++)
|
|
{
|
|
sprintf(samplerLine, sample_sampler_kernel_pattern[1], i);
|
|
strcat(programSrc, samplerLine);
|
|
}
|
|
strcat(programSrc, sample_sampler_kernel_pattern[2]);
|
|
for (i = 0; i < maxSamplers; i++)
|
|
{
|
|
sprintf(samplerLine, sample_sampler_kernel_pattern[3], i);
|
|
strcat(programSrc, samplerLine);
|
|
}
|
|
strcat(programSrc, sample_sampler_kernel_pattern[4]);
|
|
|
|
|
|
error =
|
|
create_single_kernel_helper(context, &program, &kernel, 1,
|
|
(const char **)&programSrc, "sample_test");
|
|
test_error(error, "Failed to create the program and kernel.");
|
|
|
|
// We have to set up some fake parameters so it'll work
|
|
clSamplerWrapper *samplers = new clSamplerWrapper[maxSamplers];
|
|
|
|
cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
|
|
|
|
clMemWrapper image = create_image_2d(context, CL_MEM_READ_WRITE, &format,
|
|
16, 16, 0, NULL, &error);
|
|
test_error(error, "Unable to create a test image");
|
|
|
|
clMemWrapper stream =
|
|
clCreateBuffer(context, CL_MEM_READ_WRITE, 16, NULL, &error);
|
|
test_error(error, "Unable to create test buffer");
|
|
|
|
error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &image);
|
|
error |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &stream);
|
|
test_error(error, "Unable to set kernel arguments");
|
|
for (i = 0; i < maxSamplers; i++)
|
|
{
|
|
samplers[i] = clCreateSampler(context, CL_FALSE, CL_ADDRESS_NONE,
|
|
CL_FILTER_NEAREST, &error);
|
|
test_error(error, "Unable to create sampler");
|
|
|
|
error = clSetKernelArg(kernel, 2 + i, sizeof(cl_sampler), &samplers[i]);
|
|
test_error(error, "Unable to set sampler argument");
|
|
}
|
|
|
|
size_t globalDim[3] = { 1, 1, 1 }, localDim[3] = { 1, 1, 1 };
|
|
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim,
|
|
0, NULL, &event);
|
|
test_error(
|
|
error,
|
|
"clEnqueueNDRangeKernel failed with maximum number of samplers.");
|
|
|
|
// Verify that the event does not return an error from the execution
|
|
error = clWaitForEvents(1, &event);
|
|
test_error(error, "clWaitForEvent failed");
|
|
error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
|
sizeof(event_status), &event_status, NULL);
|
|
test_error(error,
|
|
"clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
|
|
clReleaseEvent(event);
|
|
if (event_status < 0)
|
|
test_error(error, "Kernel execution event returned error");
|
|
|
|
free(programSrc);
|
|
delete[] samplers;
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
size_t threads[1], localThreads[1];
|
|
cl_int *constantData, *resultData;
|
|
cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize;
|
|
int i;
|
|
cl_event event;
|
|
cl_int event_status;
|
|
MTdata d;
|
|
|
|
/* Verify our test buffer won't be bigger than allowed */
|
|
maxSize = get_device_info_max_constant_buffer_size(
|
|
deviceID, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
|
|
|
|
if ((0 == gIsEmbedded
|
|
&& (maxSize * MAX_DEVICE_MEMORY_SIZE_DIVISOR) < 64L * 1024L)
|
|
|| (maxSize * MAX_DEVICE_MEMORY_SIZE_DIVISOR) < 1L * 1024L)
|
|
{
|
|
log_error("ERROR: Reported max constant buffer size less than required "
|
|
"by OpenCL 1.0 (reported %d KB)\n",
|
|
(int)(maxSize / 1024L));
|
|
return -1;
|
|
}
|
|
|
|
log_info("Reported max constant buffer size of %lld bytes.\n", maxSize);
|
|
|
|
/* We have four buffers allocations */
|
|
maxGlobalSize = get_device_info_global_mem_size(
|
|
deviceID, MAX_DEVICE_MEMORY_SIZE_DIVISOR * 4);
|
|
|
|
if (maxSize > maxGlobalSize) maxSize = maxGlobalSize;
|
|
|
|
maxAllocSize = get_device_info_max_mem_alloc_size(
|
|
deviceID, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
|
|
|
|
if (maxSize > maxAllocSize) maxSize = maxAllocSize;
|
|
|
|
/* Create a kernel to test with */
|
|
if (create_single_kernel_helper(context, &program, &kernel, 1,
|
|
sample_const_arg_kernel, "sample_test")
|
|
!= 0)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
/* Try the returned max size and decrease it until we get one that works. */
|
|
stepSize = maxSize / 16;
|
|
currentSize = maxSize;
|
|
int allocPassed = 0;
|
|
d = init_genrand(gRandomSeed);
|
|
while (!allocPassed && currentSize >= maxSize / MAX_REDUCTION_FACTOR)
|
|
{
|
|
log_info("Attempting to allocate constant buffer of size %lld bytes\n",
|
|
maxSize);
|
|
|
|
/* Create some I/O streams */
|
|
size_t sizeToAllocate =
|
|
((size_t)currentSize / sizeof(cl_int)) * sizeof(cl_int);
|
|
size_t numberOfInts = sizeToAllocate / sizeof(cl_int);
|
|
constantData = (cl_int *)malloc(sizeToAllocate);
|
|
if (constantData == NULL)
|
|
{
|
|
log_error("Failed to allocate memory for constantData!\n");
|
|
free_mtdata(d);
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
for (i = 0; i < (int)(numberOfInts); i++)
|
|
constantData[i] = (int)genrand_int32(d);
|
|
|
|
clMemWrapper streams[3];
|
|
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
|
|
sizeToAllocate, constantData, &error);
|
|
test_error(error, "Creating test array failed");
|
|
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
|
|
NULL, &error);
|
|
test_error(error, "Creating test array failed");
|
|
|
|
|
|
/* Set the arguments */
|
|
error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
|
|
test_error(error, "Unable to set indexed kernel arguments");
|
|
error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
|
|
test_error(error, "Unable to set indexed kernel arguments");
|
|
|
|
|
|
/* Test running the kernel and verifying it */
|
|
threads[0] = numberOfInts;
|
|
localThreads[0] = 1;
|
|
log_info("Filling constant buffer with %d cl_ints (%d bytes).\n",
|
|
(int)threads[0], (int)(threads[0] * sizeof(cl_int)));
|
|
|
|
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
|
|
localThreads, 0, NULL, &event);
|
|
/* If we failed due to a resource issue, reduce the size and try again.
|
|
*/
|
|
if ((error == CL_OUT_OF_RESOURCES)
|
|
|| (error == CL_MEM_OBJECT_ALLOCATION_FAILURE)
|
|
|| (error == CL_OUT_OF_HOST_MEMORY))
|
|
{
|
|
log_info("Kernel enqueue failed at size %lld, trying at a reduced "
|
|
"size.\n",
|
|
currentSize);
|
|
currentSize -= stepSize;
|
|
free(constantData);
|
|
continue;
|
|
}
|
|
test_error(
|
|
error,
|
|
"clEnqueueNDRangeKernel with maximum constant buffer size failed.");
|
|
|
|
// Verify that the event does not return an error from the execution
|
|
error = clWaitForEvents(1, &event);
|
|
test_error(error, "clWaitForEvent failed");
|
|
error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
|
sizeof(event_status), &event_status, NULL);
|
|
test_error(
|
|
error,
|
|
"clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
|
|
clReleaseEvent(event);
|
|
if (event_status < 0)
|
|
{
|
|
if ((event_status == CL_OUT_OF_RESOURCES)
|
|
|| (event_status == CL_MEM_OBJECT_ALLOCATION_FAILURE)
|
|
|| (event_status == CL_OUT_OF_HOST_MEMORY))
|
|
{
|
|
log_info("Kernel event indicates failure at size %lld, trying "
|
|
"at a reduced size.\n",
|
|
currentSize);
|
|
currentSize -= stepSize;
|
|
free(constantData);
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
test_error(error, "Kernel execution event returned error");
|
|
}
|
|
}
|
|
|
|
/* Otherwise we did not fail due to resource issues. */
|
|
allocPassed = 1;
|
|
|
|
resultData = (cl_int *)malloc(sizeToAllocate);
|
|
if (resultData == NULL)
|
|
{
|
|
log_error("Failed to allocate memory for resultData!\n");
|
|
free(constantData);
|
|
free_mtdata(d);
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
|
|
sizeToAllocate, resultData, 0, NULL, NULL);
|
|
test_error(error, "clEnqueueReadBuffer failed");
|
|
|
|
for (i = 0; i < (int)(numberOfInts); i++)
|
|
if (constantData[i] != resultData[i])
|
|
{
|
|
log_error("Data failed to verify: constantData[%d]=%d != "
|
|
"resultData[%d]=%d\n",
|
|
i, constantData[i], i, resultData[i]);
|
|
free(constantData);
|
|
free(resultData);
|
|
free_mtdata(d);
|
|
d = NULL;
|
|
return -1;
|
|
}
|
|
|
|
free(constantData);
|
|
free(resultData);
|
|
}
|
|
free_mtdata(d);
|
|
d = NULL;
|
|
|
|
if (allocPassed)
|
|
{
|
|
if (currentSize < maxSize / MAX_REDUCTION_FACTOR)
|
|
{
|
|
log_error("Failed to allocate at least 1/8 of the reported "
|
|
"constant size.\n");
|
|
return -1;
|
|
}
|
|
else if (currentSize != maxSize)
|
|
{
|
|
log_info("Passed at reduced size. (%lld of %lld bytes)\n",
|
|
currentSize, maxSize);
|
|
return 0;
|
|
}
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int test_min_max_constant_args(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
clMemWrapper *streams;
|
|
size_t threads[1], localThreads[1];
|
|
cl_uint i, maxArgs;
|
|
cl_ulong maxSize;
|
|
cl_ulong maxParameterSize;
|
|
size_t individualBufferSize;
|
|
char *programSrc, *constArgs, *str2;
|
|
char str[512];
|
|
const char *ptr;
|
|
cl_event event;
|
|
cl_int event_status;
|
|
|
|
|
|
/* Verify our test buffer won't be bigger than allowed */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_CONSTANT_ARGS,
|
|
sizeof(maxArgs), &maxArgs, 0);
|
|
test_error(error, "Unable to get max constant arg count");
|
|
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
|
|
sizeof(maxParameterSize), &maxParameterSize, NULL);
|
|
test_error(error, "Unable to get max parameter size from device");
|
|
|
|
// Subtract the size of the result
|
|
maxParameterSize -= sizeof(cl_mem);
|
|
|
|
// Calculate the number we can use
|
|
if (maxParameterSize / sizeof(cl_mem) < maxArgs)
|
|
{
|
|
log_info("WARNING: Max parameter size of %d bytes limits test to %d "
|
|
"max image arguments.\n",
|
|
(int)maxParameterSize,
|
|
(int)(maxParameterSize / sizeof(cl_mem)));
|
|
maxArgs = (unsigned int)(maxParameterSize / sizeof(cl_mem));
|
|
}
|
|
|
|
|
|
if (maxArgs < (gIsEmbedded ? 4 : 8))
|
|
{
|
|
log_error("ERROR: Reported max constant arg count less than required "
|
|
"by OpenCL 1.0 (reported %d)\n",
|
|
(int)maxArgs);
|
|
return -1;
|
|
}
|
|
|
|
maxSize = get_device_info_max_constant_buffer_size(
|
|
deviceID, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
|
|
individualBufferSize = ((int)maxSize / 2) / maxArgs;
|
|
|
|
log_info(
|
|
"Reported max constant arg count of %u and max constant buffer "
|
|
"size of %llu. Test will attempt to allocate half of that, or %llu "
|
|
"buffers of size %zu.\n",
|
|
maxArgs, maxSize, maxArgs, individualBufferSize);
|
|
|
|
str2 = (char *)malloc(sizeof(char) * 32 * (maxArgs + 2));
|
|
constArgs = (char *)malloc(sizeof(char) * 32 * (maxArgs + 2));
|
|
programSrc = (char *)malloc(sizeof(char) * 32 * 2 * (maxArgs + 2) + 1024);
|
|
|
|
/* Create a test program */
|
|
constArgs[0] = 0;
|
|
str2[0] = 0;
|
|
for (i = 0; i < maxArgs - 1; i++)
|
|
{
|
|
sprintf(str, ", __constant int *src%d", (int)(i + 2));
|
|
strcat(constArgs, str);
|
|
sprintf(str2 + strlen(str2), "\tdst[tid] += src%d[tid];\n",
|
|
(int)(i + 2));
|
|
if (strlen(str2) > (sizeof(char) * 32 * (maxArgs + 2) - 32)
|
|
|| strlen(constArgs) > (sizeof(char) * 32 * (maxArgs + 2) - 32))
|
|
{
|
|
log_info("Limiting number of arguments tested to %d due to test "
|
|
"program allocation size.\n",
|
|
i);
|
|
break;
|
|
}
|
|
}
|
|
sprintf(programSrc, sample_const_max_arg_kernel_pattern, constArgs, str2);
|
|
|
|
/* Create a kernel to test with */
|
|
ptr = programSrc;
|
|
if (create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
|
|
"sample_test")
|
|
!= 0)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
/* Create some I/O streams */
|
|
streams = new clMemWrapper[maxArgs + 1];
|
|
for (i = 0; i < maxArgs + 1; i++)
|
|
{
|
|
streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
|
individualBufferSize, NULL, &error);
|
|
test_error(error, "Creating test array failed");
|
|
}
|
|
|
|
/* Set the arguments */
|
|
for (i = 0; i < maxArgs + 1; i++)
|
|
{
|
|
error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
|
|
test_error(error, "Unable to set kernel argument");
|
|
}
|
|
|
|
/* Test running the kernel and verifying it */
|
|
threads[0] = (size_t)10;
|
|
while (threads[0] * sizeof(cl_int) > individualBufferSize) threads[0]--;
|
|
|
|
error = get_max_common_work_group_size(context, kernel, threads[0],
|
|
&localThreads[0]);
|
|
test_error(error, "Unable to get work group size to use");
|
|
|
|
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
|
|
localThreads, 0, NULL, &event);
|
|
test_error(error, "clEnqueueNDRangeKernel failed");
|
|
|
|
// Verify that the event does not return an error from the execution
|
|
error = clWaitForEvents(1, &event);
|
|
test_error(error, "clWaitForEvent failed");
|
|
error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
|
sizeof(event_status), &event_status, NULL);
|
|
test_error(error,
|
|
"clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
|
|
clReleaseEvent(event);
|
|
if (event_status < 0)
|
|
test_error(error, "Kernel execution event returned error");
|
|
|
|
error = clFinish(queue);
|
|
test_error(error, "clFinish failed.");
|
|
|
|
delete[] streams;
|
|
free(str2);
|
|
free(constArgs);
|
|
free(programSrc);
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_compute_units(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
cl_uint value;
|
|
|
|
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS,
|
|
sizeof(value), &value, 0);
|
|
test_error(error, "Unable to get compute unit count");
|
|
|
|
if (value < 1)
|
|
{
|
|
log_error("ERROR: Reported compute unit count less than required by "
|
|
"OpenCL 1.0 (reported %d)\n",
|
|
(int)value);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Reported %d max compute units.\n", value);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_address_bits(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
cl_uint value;
|
|
|
|
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS, sizeof(value),
|
|
&value, 0);
|
|
test_error(error, "Unable to get address bit count");
|
|
|
|
if (value != 32 && value != 64)
|
|
{
|
|
log_error("ERROR: Reported address bit count not valid by OpenCL 1.0 "
|
|
"(reported %d)\n",
|
|
(int)value);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Reported %d device address bits.\n", value);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
cl_device_fp_config value;
|
|
char profile[128] = "";
|
|
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(value),
|
|
&value, 0);
|
|
test_error(error, "Unable to get device single fp config");
|
|
|
|
// Check to see if we are an embedded profile device
|
|
if ((error = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILE, sizeof(profile),
|
|
profile, NULL)))
|
|
{
|
|
log_error("FAILURE: Unable to get CL_DEVICE_PROFILE: error %d\n",
|
|
error);
|
|
return error;
|
|
}
|
|
|
|
if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
|
|
{ // embedded device
|
|
|
|
if (0 == (value & (CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO)))
|
|
{
|
|
log_error("FAILURE: embedded device supports neither "
|
|
"CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n");
|
|
return -1;
|
|
}
|
|
}
|
|
else
|
|
{ // Full profile
|
|
if ((value & (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN))
|
|
!= (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN))
|
|
{
|
|
log_error("ERROR: Reported single fp config doesn't meet minimum "
|
|
"set by OpenCL 1.0 (reported 0x%08x)\n",
|
|
(int)value);
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
cl_device_fp_config value;
|
|
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(value),
|
|
&value, 0);
|
|
test_error(error, "Unable to get device double fp config");
|
|
|
|
if (value == 0) return 0;
|
|
|
|
if ((value
|
|
& (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO
|
|
| CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM))
|
|
!= (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO
|
|
| CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM))
|
|
{
|
|
log_error("ERROR: Reported double fp config doesn't meet minimum set "
|
|
"by OpenCL 1.0 (reported 0x%08x)\n",
|
|
(int)value);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
clMemWrapper streams[3];
|
|
size_t threads[1], localThreads[1];
|
|
cl_int *localData, *resultData;
|
|
cl_ulong maxSize, kernelLocalUsage, min_max_local_mem_size;
|
|
Version device_version;
|
|
int i;
|
|
int err = 0;
|
|
MTdata d;
|
|
|
|
/* Verify our test buffer won't be bigger than allowed */
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(maxSize),
|
|
&maxSize, 0);
|
|
test_error(error, "Unable to get max local buffer size");
|
|
|
|
try
|
|
{
|
|
device_version = get_device_cl_version(deviceID);
|
|
} catch (const std::runtime_error &e)
|
|
{
|
|
log_error("%s", e.what());
|
|
return -1;
|
|
}
|
|
|
|
if (!gIsEmbedded)
|
|
{
|
|
if (device_version == Version(1, 0))
|
|
min_max_local_mem_size = 16L * 1024L;
|
|
else
|
|
min_max_local_mem_size = 32L * 1024L;
|
|
}
|
|
else
|
|
{
|
|
min_max_local_mem_size = 1L * 1024L;
|
|
}
|
|
|
|
if (maxSize < min_max_local_mem_size)
|
|
{
|
|
const std::string version_as_string = device_version.to_string();
|
|
log_error("ERROR: Reported local mem size less than required by OpenCL "
|
|
"%s (reported %d KB)\n",
|
|
version_as_string.c_str(), (int)(maxSize / 1024L));
|
|
return -1;
|
|
}
|
|
|
|
log_info("Reported max local buffer size for device: %lld bytes.\n",
|
|
maxSize);
|
|
|
|
/* Create a kernel to test with */
|
|
if (create_single_kernel_helper(context, &program, &kernel, 1,
|
|
sample_local_arg_kernel, "sample_test")
|
|
!= 0)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE,
|
|
sizeof(kernelLocalUsage),
|
|
&kernelLocalUsage, NULL);
|
|
test_error(error,
|
|
"clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed");
|
|
|
|
log_info("Reported local buffer usage for kernel "
|
|
"(CL_KERNEL_LOCAL_MEM_SIZE): %lld bytes.\n",
|
|
kernelLocalUsage);
|
|
|
|
/* Create some I/O streams */
|
|
size_t sizeToAllocate =
|
|
((size_t)(maxSize - kernelLocalUsage) / sizeof(cl_int))
|
|
* sizeof(cl_int);
|
|
size_t numberOfInts = sizeToAllocate / sizeof(cl_int);
|
|
|
|
log_info("Attempting to use %zu bytes of local memory.\n", sizeToAllocate);
|
|
|
|
localData = (cl_int *)malloc(sizeToAllocate);
|
|
d = init_genrand(gRandomSeed);
|
|
for (i = 0; i < (int)(numberOfInts); i++)
|
|
localData[i] = (int)genrand_int32(d);
|
|
free_mtdata(d);
|
|
d = NULL;
|
|
|
|
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate,
|
|
localData, &error);
|
|
test_error(error, "Creating test array failed");
|
|
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
|
|
NULL, &error);
|
|
test_error(error, "Creating test array failed");
|
|
|
|
|
|
/* Set the arguments */
|
|
error = clSetKernelArg(kernel, 0, sizeToAllocate, NULL);
|
|
test_error(error, "Unable to set indexed kernel arguments");
|
|
error = clSetKernelArg(kernel, 1, sizeof(streams[0]), &streams[0]);
|
|
test_error(error, "Unable to set indexed kernel arguments");
|
|
error = clSetKernelArg(kernel, 2, sizeof(streams[1]), &streams[1]);
|
|
test_error(error, "Unable to set indexed kernel arguments");
|
|
|
|
|
|
/* Test running the kernel and verifying it */
|
|
threads[0] = numberOfInts;
|
|
localThreads[0] = 1;
|
|
log_info("Creating local buffer with %zu cl_ints (%zu bytes).\n",
|
|
numberOfInts, sizeToAllocate);
|
|
|
|
cl_event evt;
|
|
cl_int evt_err;
|
|
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
|
|
localThreads, 0, NULL, &evt);
|
|
test_error(error, "clEnqueueNDRangeKernel failed");
|
|
|
|
error = clFinish(queue);
|
|
test_error(error, "clFinish failed");
|
|
|
|
error = clGetEventInfo(evt, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
|
sizeof evt_err, &evt_err, NULL);
|
|
test_error(error, "clGetEventInfo with maximum local buffer size failed.");
|
|
|
|
if (evt_err != CL_COMPLETE)
|
|
{
|
|
print_error(evt_err, "Kernel event returned error");
|
|
clReleaseEvent(evt);
|
|
return -1;
|
|
}
|
|
|
|
resultData = (cl_int *)malloc(sizeToAllocate);
|
|
|
|
error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate,
|
|
resultData, 0, NULL, NULL);
|
|
test_error(error, "clEnqueueReadBuffer failed");
|
|
|
|
for (i = 0; i < (int)(numberOfInts); i++)
|
|
if (localData[i] != resultData[i])
|
|
{
|
|
clReleaseEvent(evt);
|
|
free(localData);
|
|
free(resultData);
|
|
log_error("Results failed to verify.\n");
|
|
return -1;
|
|
}
|
|
clReleaseEvent(evt);
|
|
free(localData);
|
|
free(resultData);
|
|
|
|
return err;
|
|
}
|
|
|
|
int test_min_max_kernel_preferred_work_group_size_multiple(
|
|
cl_device_id deviceID, cl_context context, cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
int err;
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
|
|
size_t max_local_workgroup_size[3];
|
|
size_t max_workgroup_size = 0, preferred_workgroup_size = 0;
|
|
|
|
err = create_single_kernel_helper(context, &program, &kernel, 1,
|
|
sample_local_arg_kernel, "sample_test");
|
|
test_error(err, "Failed to build kernel/program.");
|
|
|
|
err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE,
|
|
sizeof(max_workgroup_size),
|
|
&max_workgroup_size, NULL);
|
|
test_error(err, "clGetKernelWorkgroupInfo failed.");
|
|
|
|
err = clGetKernelWorkGroupInfo(
|
|
kernel, deviceID, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
|
|
sizeof(preferred_workgroup_size), &preferred_workgroup_size, NULL);
|
|
test_error(err, "clGetKernelWorkgroupInfo failed.");
|
|
|
|
err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
|
sizeof(max_local_workgroup_size),
|
|
max_local_workgroup_size, NULL);
|
|
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
|
|
|
// Since the preferred size is only a performance hint, we can only really
|
|
// check that we get a sane value back
|
|
log_info("size: %ld preferred: %ld max: %ld\n", max_workgroup_size,
|
|
preferred_workgroup_size, max_local_workgroup_size[0]);
|
|
|
|
if (preferred_workgroup_size > max_workgroup_size)
|
|
{
|
|
log_error("ERROR: Reported preferred workgroup multiple larger than "
|
|
"max workgroup size (preferred %ld, max %ld)\n",
|
|
preferred_workgroup_size, max_workgroup_size);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_execution_capabilities(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue,
|
|
int num_elements)
|
|
{
|
|
int error;
|
|
cl_device_exec_capabilities value;
|
|
|
|
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_EXECUTION_CAPABILITIES,
|
|
sizeof(value), &value, 0);
|
|
test_error(error, "Unable to get execution capabilities");
|
|
|
|
if ((value & CL_EXEC_KERNEL) != CL_EXEC_KERNEL)
|
|
{
|
|
log_error("ERROR: Reported execution capabilities less than required "
|
|
"by OpenCL 1.0 (reported 0x%08x)\n",
|
|
(int)value);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_queue_properties(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
int error;
|
|
cl_command_queue_properties value;
|
|
|
|
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
|
|
sizeof(value), &value, 0);
|
|
test_error(error, "Unable to get queue properties");
|
|
|
|
if ((value & CL_QUEUE_PROFILING_ENABLE) != CL_QUEUE_PROFILING_ENABLE)
|
|
{
|
|
log_error("ERROR: Reported queue properties less than required by "
|
|
"OpenCL 1.0 (reported 0x%08x)\n",
|
|
(int)value);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_device_version(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
// Query for the device version.
|
|
Version device_cl_version = get_device_cl_version(deviceID);
|
|
log_info("Returned version %s.\n", device_cl_version.to_string().c_str());
|
|
|
|
// Make sure 2.x devices support required extensions for 2.x
|
|
// note: these extensions are **not** required for devices
|
|
// supporting OpenCL-3.0
|
|
const char *requiredExtensions2x[] = {
|
|
"cl_khr_3d_image_writes",
|
|
"cl_khr_image2d_from_buffer",
|
|
"cl_khr_depth_images",
|
|
};
|
|
|
|
// Make sure 1.1 devices support required extensions for 1.1
|
|
const char *requiredExtensions11[] = {
|
|
"cl_khr_global_int32_base_atomics",
|
|
"cl_khr_global_int32_extended_atomics",
|
|
"cl_khr_local_int32_base_atomics",
|
|
"cl_khr_local_int32_extended_atomics",
|
|
"cl_khr_byte_addressable_store",
|
|
};
|
|
|
|
|
|
if (device_cl_version >= Version(1, 1))
|
|
{
|
|
log_info("Checking for required extensions for OpenCL 1.1 and later "
|
|
"devices...\n");
|
|
for (size_t i = 0; i < ARRAY_SIZE(requiredExtensions11); i++)
|
|
{
|
|
if (!is_extension_available(deviceID, requiredExtensions11[i]))
|
|
{
|
|
log_error("ERROR: Required extension for 1.1 and greater "
|
|
"devices is not in extension string: %s\n",
|
|
requiredExtensions11[i]);
|
|
return -1;
|
|
}
|
|
else
|
|
log_info("\t%s\n", requiredExtensions11[i]);
|
|
}
|
|
|
|
if (device_cl_version >= Version(1, 2))
|
|
{
|
|
log_info("Checking for required extensions for OpenCL 1.2 and "
|
|
"later devices...\n");
|
|
// The only required extension for an OpenCL-1.2 device is
|
|
// cl_khr_fp64 and it is only required if double precision is
|
|
// supported.
|
|
cl_device_fp_config doubles_supported;
|
|
cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG,
|
|
sizeof(doubles_supported),
|
|
&doubles_supported, 0);
|
|
test_error(error, "Unable to get device double fp config");
|
|
if (doubles_supported)
|
|
{
|
|
if (!is_extension_available(deviceID, "cl_khr_fp64"))
|
|
{
|
|
log_error(
|
|
"ERROR: Required extension for 1.2 and greater devices "
|
|
"is not in extension string: cl_khr_fp64\n");
|
|
}
|
|
else
|
|
{
|
|
log_info("\t%s\n", "cl_khr_fp64");
|
|
}
|
|
}
|
|
}
|
|
|
|
if (device_cl_version >= Version(2, 0)
|
|
&& device_cl_version < Version(3, 0))
|
|
{
|
|
log_info("Checking for required extensions for OpenCL 2.0, 2.1 and "
|
|
"2.2 devices...\n");
|
|
for (size_t i = 0; i < ARRAY_SIZE(requiredExtensions2x); i++)
|
|
{
|
|
if (!is_extension_available(deviceID, requiredExtensions2x[i]))
|
|
{
|
|
log_error("ERROR: Required extension for 2.0, 2.1 and 2.2 "
|
|
"devices is not in extension string: %s\n",
|
|
requiredExtensions2x[i]);
|
|
return -1;
|
|
}
|
|
else
|
|
{
|
|
log_info("\t%s\n", requiredExtensions2x[i]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
log_info("WARNING: skipping required extension test -- OpenCL 1.0 "
|
|
"device.\n");
|
|
return 0;
|
|
}
|
|
|
|
int test_min_max_language_version(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
cl_int error;
|
|
cl_char buffer[4098];
|
|
size_t length;
|
|
|
|
// Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_OPENCL_C_VERSION,
|
|
sizeof(buffer), buffer, &length);
|
|
test_error(error, "Unable to get device opencl c version string");
|
|
if (memcmp(buffer, "OpenCL C ", strlen("OpenCL C ")) != 0)
|
|
{
|
|
log_error("ERROR: Initial part of device language version string does "
|
|
"not match required format! (returned: \"%s\")\n",
|
|
(char *)buffer);
|
|
return -1;
|
|
}
|
|
|
|
log_info("Returned version \"%s\".\n", buffer);
|
|
|
|
char *p1 = (char *)buffer + strlen("OpenCL C ");
|
|
while (*p1 == ' ') p1++;
|
|
char *p2 = p1;
|
|
if (!isdigit(*p2))
|
|
{
|
|
log_error("ERROR: Major revision number must follow space behind "
|
|
"OpenCL C! (returned %s)\n",
|
|
(char *)buffer);
|
|
return -1;
|
|
}
|
|
while (isdigit(*p2)) p2++;
|
|
if (*p2 != '.')
|
|
{
|
|
log_error("ERROR: Version number must contain a decimal point! "
|
|
"(returned: %s)\n",
|
|
(char *)buffer);
|
|
return -1;
|
|
}
|
|
char *p3 = p2 + 1;
|
|
if (!isdigit(*p3))
|
|
{
|
|
log_error("ERROR: Minor revision number is missing or does not abut "
|
|
"the decimal point! (returned %s)\n",
|
|
(char *)buffer);
|
|
return -1;
|
|
}
|
|
while (isdigit(*p3)) p3++;
|
|
if (*p3 != ' ')
|
|
{
|
|
log_error("ERROR: A space must appear after the minor version! "
|
|
"(returned: %s)\n",
|
|
(char *)buffer);
|
|
return -1;
|
|
}
|
|
*p2 = ' '; // Put in a space for atoi below.
|
|
p2++;
|
|
|
|
int major = atoi(p1);
|
|
int minor = atoi(p2);
|
|
int minor_revision = 2;
|
|
|
|
if (major * 10 + minor < 10 + minor_revision)
|
|
{
|
|
// If the language version did not match, check to see if
|
|
// OPENCL_1_0_DEVICE is set.
|
|
if (getenv("OPENCL_1_0_DEVICE"))
|
|
{
|
|
log_info("WARNING: This test was run with OPENCL_1_0_DEVICE "
|
|
"defined! This is not a OpenCL 1.1 or OpenCL 1.2 "
|
|
"compatible device!!!\n");
|
|
}
|
|
else if (getenv("OPENCL_1_1_DEVICE"))
|
|
{
|
|
log_info(
|
|
"WARNING: This test was run with OPENCL_1_1_DEVICE defined! "
|
|
"This is not a OpenCL 1.2 compatible device!!!\n");
|
|
}
|
|
else
|
|
{
|
|
log_error("ERROR: OpenCL device language version returned is less "
|
|
"than 1.%d! (Returned: %s)\n",
|
|
minor_revision, (char *)buffer);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
// Sanity checks on the returned values
|
|
if (length != (strlen((char *)buffer) + 1))
|
|
{
|
|
log_error("ERROR: Returned length of version string does not match "
|
|
"actual length (actual: %d, returned: %d)\n",
|
|
(int)strlen((char *)buffer), (int)length);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|