mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 14:09:03 +00:00
* Don't recalculate image parameters repeatedly in `test_read_image()` We've already done this in the loop. There's no need to recalculate those parameters over and over again in `sample_image_pixel*()` and `read_image_pixel*()`. This should save some work during the image streams test. This only affects the 3D tests for now, but my time profiles indicate this is where we spend the most time anyway. * Vectorize read_image_pixel_float() and sample_image_pixel_float() for SSE/AVX This shortens the image streams test time from 45 minutes without it to 37 minutes. Unfortunately, most of the time is now spent waiting for memory, particularly in the 3D tests, because the 3D image doesn't neatly fit in the cache, especially in the linear sampling case, where pixels from two 2D slices must be sampled. Software prefetching won't help; it only helps when execution time is dominated by operations, but this is dominated by memory access. Randomized offsets are likely a factor, because they throw off the hardware prefetcher. One possible further optimization is, in the linear sampling case, to load two sampled pixels at once. This is easy to do using AVX, which extends SSE with 256-bit vectors. Obviously, this only applies to x86 CPUs with SSE2. The greatest performance gains, however, are seen with SSE4.1. Most modern x86 CPus have SSE4. Work is needed to support other CPUs' vector units--ARM Advanced SIMD/NEON is probably the most important one. Another possibility is arranging the code so that the compiler's autovectorization will kick in and do what I did here manually.
1689 lines
87 KiB
C++
1689 lines
87 KiB
C++
//
|
|
// Copyright (c) 2021 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
#include "test_common.h"
|
|
|
|
#include <algorithm>
|
|
|
|
cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error) {
|
|
cl_sampler sampler = nullptr;
|
|
if (test_mipmaps) {
|
|
cl_sampler_properties properties[] = {
|
|
CL_SAMPLER_NORMALIZED_COORDS, sdata->normalized_coords,
|
|
CL_SAMPLER_ADDRESSING_MODE, sdata->addressing_mode,
|
|
CL_SAMPLER_FILTER_MODE, sdata->filter_mode,
|
|
CL_SAMPLER_MIP_FILTER_MODE, sdata->filter_mode,
|
|
0};
|
|
sampler = clCreateSamplerWithProperties(context, properties, error);
|
|
} else {
|
|
sampler = clCreateSampler(context, sdata->normalized_coords, sdata->addressing_mode, sdata->filter_mode, error);
|
|
}
|
|
return sampler;
|
|
}
|
|
|
|
bool get_image_dimensions(image_descriptor *imageInfo, size_t &width,
|
|
size_t &height, size_t &depth)
|
|
{
|
|
width = imageInfo->width;
|
|
height = 1;
|
|
depth = 1;
|
|
switch (imageInfo->type)
|
|
{
|
|
case CL_MEM_OBJECT_IMAGE1D: break;
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY: height = imageInfo->arraySize; break;
|
|
case CL_MEM_OBJECT_IMAGE2D: height = imageInfo->height; break;
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
height = imageInfo->height;
|
|
depth = imageInfo->arraySize;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE3D:
|
|
height = imageInfo->height;
|
|
depth = imageInfo->depth;
|
|
break;
|
|
default:
|
|
log_error("ERROR: Test does not support image type");
|
|
return TEST_FAIL;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static bool InitFloatCoordsCommon(image_descriptor *imageInfo,
|
|
image_sampler_data *imageSampler,
|
|
float *xOffsets, float *yOffsets,
|
|
float *zOffsets, float xfract, float yfract,
|
|
float zfract, int normalized_coords, MTdata d,
|
|
int lod)
|
|
{
|
|
size_t i = 0;
|
|
size_t width_loop, height_loop, depth_loop;
|
|
bool error =
|
|
get_image_dimensions(imageInfo, width_loop, height_loop, depth_loop);
|
|
if (!error)
|
|
{
|
|
if (gDisableOffsets)
|
|
{
|
|
for (size_t z = 0; z < depth_loop; z++)
|
|
{
|
|
for (size_t y = 0; y < height_loop; y++)
|
|
{
|
|
for (size_t x = 0; x < width_loop; x++, i++)
|
|
{
|
|
xOffsets[i] = (float)(xfract + (double)x);
|
|
yOffsets[i] = (float)(yfract + (double)y);
|
|
zOffsets[i] = (float)(zfract + (double)z);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (size_t z = 0; z < depth_loop; z++)
|
|
{
|
|
for (size_t y = 0; y < height_loop; y++)
|
|
{
|
|
for (size_t x = 0; x < width_loop; x++, i++)
|
|
{
|
|
xOffsets[i] =
|
|
(float)(xfract
|
|
+ (double)((int)x
|
|
+ random_in_range(-10, 10, d)));
|
|
yOffsets[i] =
|
|
(float)(yfract
|
|
+ (double)((int)y
|
|
+ random_in_range(-10, 10, d)));
|
|
zOffsets[i] =
|
|
(float)(zfract
|
|
+ (double)((int)z
|
|
+ random_in_range(-10, 10, d)));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
|
|
{
|
|
i = 0;
|
|
for (size_t z = 0; z < depth_loop; z++)
|
|
{
|
|
for (size_t y = 0; y < height_loop; y++)
|
|
{
|
|
for (size_t x = 0; x < width_loop; x++, i++)
|
|
{
|
|
xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
|
|
(double)width_loop - 1.0);
|
|
yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
|
|
(double)height_loop - 1.0);
|
|
zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
|
|
(double)depth_loop - 1.0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (normalized_coords || gTestMipmaps)
|
|
{
|
|
i = 0;
|
|
if (lod == 0)
|
|
{
|
|
for (size_t z = 0; z < depth_loop; z++)
|
|
{
|
|
for (size_t y = 0; y < height_loop; y++)
|
|
{
|
|
for (size_t x = 0; x < width_loop; x++, i++)
|
|
{
|
|
xOffsets[i] = (float)((double)xOffsets[i]
|
|
/ (double)width_loop);
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
|
{
|
|
yOffsets[i] = (float)((double)yOffsets[i]
|
|
/ (double)height_loop);
|
|
}
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
|
|
{
|
|
zOffsets[i] = (float)((double)zOffsets[i]
|
|
/ (double)depth_loop);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (gTestMipmaps)
|
|
{
|
|
size_t width_lod =
|
|
(width_loop >> lod) ? (width_loop >> lod) : 1;
|
|
size_t height_lod = height_loop;
|
|
size_t depth_lod = depth_loop;
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
|
{
|
|
height_lod =
|
|
(height_loop >> lod) ? (height_loop >> lod) : 1;
|
|
}
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
|
|
{
|
|
depth_lod = (depth_loop >> lod) ? (depth_loop >> lod) : 1;
|
|
}
|
|
|
|
for (size_t z = 0; z < depth_lod; z++)
|
|
{
|
|
for (size_t y = 0; y < height_lod; y++)
|
|
{
|
|
for (size_t x = 0; x < width_lod; x++, i++)
|
|
{
|
|
xOffsets[i] = (float)((double)xOffsets[i]
|
|
/ (double)width_lod);
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
|
{
|
|
yOffsets[i] = (float)((double)yOffsets[i]
|
|
/ (double)height_lod);
|
|
}
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
|
|
{
|
|
zOffsets[i] = (float)((double)zOffsets[i]
|
|
/ (double)depth_lod);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return error;
|
|
}
|
|
|
|
cl_mem create_image_of_type(cl_context context, cl_mem_flags mem_flags,
|
|
image_descriptor *imageInfo, size_t row_pitch,
|
|
size_t slice_pitch, void *host_ptr, cl_int *error)
|
|
{
|
|
cl_mem image;
|
|
switch (imageInfo->type)
|
|
{
|
|
case CL_MEM_OBJECT_IMAGE3D:
|
|
image = create_image_3d(context, mem_flags, imageInfo->format,
|
|
imageInfo->width, imageInfo->height,
|
|
imageInfo->depth, row_pitch, slice_pitch,
|
|
host_ptr, error);
|
|
break;
|
|
default:
|
|
log_error("Implementation is incomplete, only 3D images are "
|
|
"supported so far");
|
|
return nullptr;
|
|
}
|
|
return image;
|
|
}
|
|
|
|
static size_t get_image_num_pixels(image_descriptor *imageInfo, size_t width,
|
|
size_t height, size_t depth,
|
|
size_t array_size)
|
|
{
|
|
size_t image_size;
|
|
switch (imageInfo->type)
|
|
{
|
|
case CL_MEM_OBJECT_IMAGE3D: image_size = width * height * depth; break;
|
|
default:
|
|
log_error("Implementation is incomplete, only 3D images are "
|
|
"supported so far");
|
|
return 0;
|
|
}
|
|
return image_size;
|
|
}
|
|
|
|
int test_read_image(cl_context context, cl_command_queue queue,
|
|
cl_kernel kernel, image_descriptor *imageInfo,
|
|
image_sampler_data *imageSampler, bool useFloatCoords,
|
|
ExplicitType outputType, MTdata d)
|
|
{
|
|
int error;
|
|
size_t threads[3];
|
|
static int initHalf = 0;
|
|
|
|
size_t image_size =
|
|
get_image_num_pixels(imageInfo, imageInfo->width, imageInfo->height,
|
|
imageInfo->depth, imageInfo->arraySize);
|
|
test_assert_error(0 != image_size, "Invalid image size");
|
|
size_t width_size, height_size, depth_size;
|
|
if (get_image_dimensions(imageInfo, width_size, height_size, depth_size))
|
|
{
|
|
log_error("ERROR: invalid image dimensions");
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
|
|
|
|
clMemWrapper xOffsets, yOffsets, zOffsets, results;
|
|
clSamplerWrapper actualSampler;
|
|
BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
|
|
|
|
// Create offset data
|
|
BufferOwningPtr<cl_float> xOffsetValues(
|
|
malloc(sizeof(cl_float) * image_size));
|
|
BufferOwningPtr<cl_float> yOffsetValues(
|
|
malloc(sizeof(cl_float) * image_size));
|
|
BufferOwningPtr<cl_float> zOffsetValues(
|
|
malloc(sizeof(cl_float) * image_size));
|
|
|
|
if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
|
|
if (DetectFloatToHalfRoundingMode(queue)) return 1;
|
|
|
|
BufferOwningPtr<char> imageValues;
|
|
generate_random_image_data(imageInfo, imageValues, d);
|
|
|
|
// Construct testing sources
|
|
clProtectedImage protImage;
|
|
clMemWrapper unprotImage;
|
|
cl_mem image;
|
|
|
|
if (gtestTypesToRun & kReadTests)
|
|
{
|
|
image_read_write_flags = CL_MEM_READ_ONLY;
|
|
}
|
|
else
|
|
{
|
|
image_read_write_flags = CL_MEM_READ_WRITE;
|
|
}
|
|
|
|
if (gMemFlagsToUse == CL_MEM_USE_HOST_PTR)
|
|
{
|
|
// clProtectedImage uses USE_HOST_PTR, so just rely on that for the
|
|
// testing (via Ian) Do not use protected images for max image size test
|
|
// since it rounds the row size to a page size
|
|
if (gTestMaxImages)
|
|
{
|
|
generate_random_image_data(imageInfo,
|
|
maxImageUseHostPtrBackingStore, d);
|
|
unprotImage = create_image_of_type(
|
|
context, image_read_write_flags | CL_MEM_USE_HOST_PTR,
|
|
imageInfo, (gEnablePitch ? imageInfo->rowPitch : 0),
|
|
(gEnablePitch ? imageInfo->slicePitch : 0),
|
|
maxImageUseHostPtrBackingStore, &error);
|
|
}
|
|
else
|
|
{
|
|
error = protImage.Create(context, imageInfo->type,
|
|
image_read_write_flags, imageInfo->format,
|
|
imageInfo->width, imageInfo->height,
|
|
imageInfo->depth, imageInfo->arraySize);
|
|
}
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
|
|
"(pitch %d, %d ) (%s)",
|
|
(int)imageInfo->width, (int)imageInfo->height,
|
|
(int)imageInfo->depth, (int)imageInfo->arraySize,
|
|
(int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
|
|
IGetErrorString(error));
|
|
return error;
|
|
}
|
|
if (gTestMaxImages)
|
|
image = (cl_mem)unprotImage;
|
|
else
|
|
image = (cl_mem)protImage;
|
|
}
|
|
else if (gMemFlagsToUse == CL_MEM_COPY_HOST_PTR)
|
|
{
|
|
// Don't use clEnqueueWriteImage; just use copy host ptr to get the data
|
|
// in
|
|
unprotImage = create_image_of_type(
|
|
context, image_read_write_flags | CL_MEM_COPY_HOST_PTR, imageInfo,
|
|
(gEnablePitch ? imageInfo->rowPitch : 0),
|
|
(gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
|
|
"(pitch %d, %d ) (%s)",
|
|
(int)imageInfo->width, (int)imageInfo->height,
|
|
(int)imageInfo->depth, (int)imageInfo->arraySize,
|
|
(int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
|
|
IGetErrorString(error));
|
|
return error;
|
|
}
|
|
image = unprotImage;
|
|
}
|
|
else // Either CL_MEM_ALLOC_HOST_PTR or none
|
|
{
|
|
// Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can
|
|
// be accessed by the host, but otherwise it works just as if no flag is
|
|
// specified, so we just do the same thing either way
|
|
if (!gTestMipmaps)
|
|
{
|
|
unprotImage = create_image_of_type(
|
|
context, image_read_write_flags | gMemFlagsToUse, imageInfo,
|
|
(gEnablePitch ? imageInfo->rowPitch : 0),
|
|
(gEnablePitch ? imageInfo->slicePitch : 0), imageValues,
|
|
&error);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_error("ERROR: Unable to create image of size %d x %d x "
|
|
"%d x %d (pitch %d, %d ) (%s)",
|
|
(int)imageInfo->width, (int)imageInfo->height,
|
|
(int)imageInfo->depth, (int)imageInfo->arraySize,
|
|
(int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
|
|
IGetErrorString(error));
|
|
return error;
|
|
}
|
|
image = unprotImage;
|
|
}
|
|
else
|
|
{
|
|
cl_image_desc image_desc = { 0 };
|
|
image_desc.image_type = imageInfo->type;
|
|
image_desc.image_width = imageInfo->width;
|
|
image_desc.image_height = imageInfo->height;
|
|
image_desc.image_depth = imageInfo->depth;
|
|
image_desc.image_array_size = imageInfo->arraySize;
|
|
image_desc.num_mip_levels = imageInfo->num_mip_levels;
|
|
|
|
|
|
unprotImage =
|
|
clCreateImage(context, image_read_write_flags,
|
|
imageInfo->format, &image_desc, NULL, &error);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_error("ERROR: Unable to create %d level mipmapped image "
|
|
"of size %d x %d x %d x %d (pitch %d, %d ) (%s)",
|
|
(int)imageInfo->num_mip_levels, (int)imageInfo->width,
|
|
(int)imageInfo->height, (int)imageInfo->depth,
|
|
(int)imageInfo->arraySize, (int)imageInfo->rowPitch,
|
|
(int)imageInfo->slicePitch, IGetErrorString(error));
|
|
return error;
|
|
}
|
|
image = unprotImage;
|
|
}
|
|
}
|
|
|
|
test_assert_error(nullptr != image, "Image creation failed");
|
|
|
|
if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR)
|
|
{
|
|
size_t origin[4] = { 0, 0, 0, 0 };
|
|
size_t region[3] = { width_size, height_size, depth_size };
|
|
|
|
if (gDebugTrace) log_info(" - Writing image...\n");
|
|
|
|
if (!gTestMipmaps)
|
|
{
|
|
|
|
error =
|
|
clEnqueueWriteImage(queue, image, CL_TRUE, origin, region,
|
|
gEnablePitch ? imageInfo->rowPitch : 0,
|
|
gEnablePitch ? imageInfo->slicePitch : 0,
|
|
imageValues, 0, NULL, NULL);
|
|
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_error("ERROR: Unable to write to image of size %d x %d "
|
|
"x %d x %d\n",
|
|
(int)imageInfo->width, (int)imageInfo->height,
|
|
(int)imageInfo->depth, (int)imageInfo->arraySize);
|
|
return error;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int nextLevelOffset = 0;
|
|
|
|
for (int i = 0; i < imageInfo->num_mip_levels; i++)
|
|
{
|
|
origin[3] = i;
|
|
error = clEnqueueWriteImage(
|
|
queue, image, CL_TRUE, origin, region, 0, 0,
|
|
((char *)imageValues + nextLevelOffset), 0, NULL, NULL);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_error("ERROR: Unable to write to %d level mipmapped "
|
|
"image of size %d x %d x %d x %d\n",
|
|
(int)imageInfo->num_mip_levels,
|
|
(int)imageInfo->width, (int)imageInfo->height,
|
|
(int)imageInfo->arraySize, (int)imageInfo->depth);
|
|
return error;
|
|
}
|
|
nextLevelOffset += region[0] * region[1] * region[2]
|
|
* get_pixel_size(imageInfo->format);
|
|
// Subsequent mip level dimensions keep halving
|
|
region[0] = region[0] >> 1 ? region[0] >> 1 : 1;
|
|
region[1] = region[1] >> 1 ? region[1] >> 1 : 1;
|
|
region[2] = region[2] >> 1 ? region[2] >> 1 : 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
xOffsets =
|
|
clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
|
|
sizeof(cl_float) * image_size, xOffsetValues, &error);
|
|
test_error(error, "Unable to create x offset buffer");
|
|
yOffsets =
|
|
clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
|
|
sizeof(cl_float) * image_size, yOffsetValues, &error);
|
|
test_error(error, "Unable to create y offset buffer");
|
|
zOffsets =
|
|
clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
|
|
sizeof(cl_float) * image_size, zOffsetValues, &error);
|
|
test_error(error, "Unable to create y offset buffer");
|
|
results = clCreateBuffer(
|
|
context, CL_MEM_READ_WRITE,
|
|
get_explicit_type_size(outputType) * 4 * image_size, NULL, &error);
|
|
test_error(error, "Unable to create result buffer");
|
|
|
|
// Create sampler to use
|
|
actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
|
|
test_error(error, "Unable to create image sampler");
|
|
|
|
// Set arguments
|
|
int idx = 0;
|
|
error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &image);
|
|
test_error(error, "Unable to set kernel arguments");
|
|
if (!gUseKernelSamplers)
|
|
{
|
|
error =
|
|
clSetKernelArg(kernel, idx++, sizeof(cl_sampler), &actualSampler);
|
|
test_error(error, "Unable to set kernel arguments");
|
|
}
|
|
error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &xOffsets);
|
|
test_error(error, "Unable to set kernel arguments");
|
|
error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &yOffsets);
|
|
test_error(error, "Unable to set kernel arguments");
|
|
error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &zOffsets);
|
|
test_error(error, "Unable to set kernel arguments");
|
|
error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &results);
|
|
test_error(error, "Unable to set kernel arguments");
|
|
|
|
const float float_offsets[] = { 0.0f,
|
|
MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30),
|
|
0.25f,
|
|
0.3f,
|
|
0.5f - FLT_EPSILON / 4.0f,
|
|
0.5f,
|
|
0.9f,
|
|
1.0f - FLT_EPSILON / 2 };
|
|
int float_offset_count = sizeof(float_offsets) / sizeof(float_offsets[0]);
|
|
int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
|
|
int loopCount = 2 * float_offset_count;
|
|
if (!useFloatCoords) loopCount = 1;
|
|
if (gTestMaxImages)
|
|
{
|
|
loopCount = 1;
|
|
log_info("Testing each size only once with pixel offsets of %g for max "
|
|
"sized images.\n",
|
|
float_offsets[0]);
|
|
}
|
|
|
|
// Get the maximum absolute error for this format
|
|
double formatAbsoluteError =
|
|
get_max_absolute_error(imageInfo->format, imageSampler);
|
|
if (gDebugTrace)
|
|
log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
|
|
|
|
if (0 == initHalf
|
|
&& imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
|
|
{
|
|
initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode(queue);
|
|
if (initHalf)
|
|
{
|
|
log_info("Half rounding mode successfully detected.\n");
|
|
}
|
|
}
|
|
|
|
int nextLevelOffset = 0;
|
|
// Precalculate LOD dimensions for sample_image_pixel_offset()
|
|
size_t width_lod = width_size, height_lod = height_size,
|
|
depth_lod = depth_size;
|
|
image_descriptor lodInfo = *imageInfo;
|
|
lodInfo.num_mip_levels = 1;
|
|
|
|
// Loop over all mipmap levels, if we are testing mipmapped images.
|
|
for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels)
|
|
|| (!gTestMipmaps && lod < 1);
|
|
lod++)
|
|
{
|
|
size_t image_lod_size =
|
|
get_image_num_pixels(&lodInfo, lodInfo.width, lodInfo.height,
|
|
lodInfo.depth, lodInfo.arraySize);
|
|
test_assert_error(0 != image_lod_size, "Invalid image size");
|
|
size_t resultValuesSize =
|
|
image_lod_size * get_explicit_type_size(outputType) * 4;
|
|
BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
|
|
float lod_float = (float)lod;
|
|
if (gTestMipmaps)
|
|
{
|
|
// Set the lod kernel arg
|
|
if (gDebugTrace) log_info(" - Working at mip level %d\n", lod);
|
|
error = clSetKernelArg(kernel, idx, sizeof(float), &lod_float);
|
|
test_error(error, "Unable to set kernel arguments");
|
|
}
|
|
|
|
for (int q = 0; q < loopCount; q++)
|
|
{
|
|
float offset = float_offsets[q % float_offset_count];
|
|
|
|
// Init the coordinates
|
|
error = InitFloatCoordsCommon(
|
|
&lodInfo, imageSampler, xOffsetValues, yOffsetValues,
|
|
zOffsetValues, q >= float_offset_count ? -offset : offset,
|
|
q >= float_offset_count ? offset : -offset,
|
|
q >= float_offset_count ? -offset : offset,
|
|
imageSampler->normalized_coords, d, 0);
|
|
test_error(error, "Unable to initialise coordinates");
|
|
|
|
error = clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
|
|
sizeof(cl_float) * image_size,
|
|
xOffsetValues, 0, NULL, NULL);
|
|
test_error(error, "Unable to write x offsets");
|
|
error = clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
|
|
sizeof(cl_float) * image_size,
|
|
yOffsetValues, 0, NULL, NULL);
|
|
test_error(error, "Unable to write y offsets");
|
|
error = clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
|
|
sizeof(cl_float) * image_size,
|
|
zOffsetValues, 0, NULL, NULL);
|
|
test_error(error, "Unable to write z offsets");
|
|
|
|
|
|
memset(resultValues, 0xff, resultValuesSize);
|
|
clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, resultValuesSize,
|
|
resultValues, 0, NULL, NULL);
|
|
|
|
// Figure out thread dimensions
|
|
threads[0] = (size_t)width_lod;
|
|
threads[1] = (size_t)height_lod;
|
|
threads[2] = (size_t)depth_lod;
|
|
|
|
// Run the kernel
|
|
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads,
|
|
NULL, 0, NULL, NULL);
|
|
test_error(error, "Unable to run kernel");
|
|
|
|
// Get results
|
|
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0,
|
|
resultValuesSize, resultValues, 0, NULL,
|
|
NULL);
|
|
test_error(error, "Unable to read results from kernel");
|
|
if (gDebugTrace) log_info(" results read\n");
|
|
|
|
// Validate results element by element
|
|
char *imagePtr = (char *)imageValues + nextLevelOffset;
|
|
/*
|
|
* FLOAT output type
|
|
*/
|
|
if (is_sRGBA_order(imageInfo->format->image_channel_order)
|
|
&& (outputType == kFloat))
|
|
{
|
|
// Validate float results
|
|
float *resultPtr = (float *)(char *)resultValues;
|
|
float expected[4], error = 0.0f;
|
|
float maxErr = get_max_relative_error(
|
|
imageInfo->format, imageSampler, 1 /*3D*/,
|
|
CL_FILTER_LINEAR == imageSampler->filter_mode);
|
|
|
|
for (size_t z = 0, j = 0; z < depth_lod; z++)
|
|
{
|
|
for (size_t y = 0; y < height_lod; y++)
|
|
{
|
|
for (size_t x = 0; x < width_lod; x++, j++)
|
|
{
|
|
// Step 1: go through and see if the results verify
|
|
// for the pixel For the normalized case on a GPU we
|
|
// put in offsets to the X, Y and Z to see if we
|
|
// land on the right pixel. This addresses the
|
|
// significant inaccuracy in GPU normalization in
|
|
// OpenCL 1.0.
|
|
int checkOnlyOnePixel = 0;
|
|
int found_pixel = 0;
|
|
float offset = NORM_OFFSET;
|
|
if (!imageSampler->normalized_coords
|
|
|| imageSampler->filter_mode
|
|
!= CL_FILTER_NEAREST
|
|
|| NORM_OFFSET == 0
|
|
#if defined(__APPLE__)
|
|
// Apple requires its CPU implementation to do
|
|
// correctly rounded address arithmetic in all
|
|
// modes
|
|
|| !(gDeviceType & CL_DEVICE_TYPE_GPU)
|
|
#endif
|
|
)
|
|
offset = 0.0f; // Loop only once
|
|
|
|
for (float norm_offset_x = -offset;
|
|
norm_offset_x <= offset && !found_pixel;
|
|
norm_offset_x += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_y = -offset;
|
|
norm_offset_y <= offset && !found_pixel;
|
|
norm_offset_y += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_z = -offset;
|
|
norm_offset_z <= NORM_OFFSET
|
|
&& !found_pixel;
|
|
norm_offset_z += NORM_OFFSET)
|
|
{
|
|
|
|
int hasDenormals = 0;
|
|
FloatPixel maxPixel =
|
|
sample_image_pixel_float_offset(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j], norm_offset_x,
|
|
norm_offset_y, norm_offset_z,
|
|
imageSampler, expected, 0,
|
|
&hasDenormals, 0);
|
|
|
|
float err1 =
|
|
ABS_ERROR(sRGBmap(resultPtr[0]),
|
|
sRGBmap(expected[0]));
|
|
float err2 =
|
|
ABS_ERROR(sRGBmap(resultPtr[1]),
|
|
sRGBmap(expected[1]));
|
|
float err3 =
|
|
ABS_ERROR(sRGBmap(resultPtr[2]),
|
|
sRGBmap(expected[2]));
|
|
float err4 = ABS_ERROR(resultPtr[3],
|
|
expected[3]);
|
|
// Clamp to the minimum absolute error
|
|
// for the format
|
|
if (err1 > 0
|
|
&& err1 < formatAbsoluteError)
|
|
{
|
|
err1 = 0.0f;
|
|
}
|
|
if (err2 > 0
|
|
&& err2 < formatAbsoluteError)
|
|
{
|
|
err2 = 0.0f;
|
|
}
|
|
if (err3 > 0
|
|
&& err3 < formatAbsoluteError)
|
|
{
|
|
err3 = 0.0f;
|
|
}
|
|
if (err4 > 0
|
|
&& err4 < formatAbsoluteError)
|
|
{
|
|
err4 = 0.0f;
|
|
}
|
|
float maxErr = 0.5;
|
|
|
|
if (!(err1 <= maxErr)
|
|
|| !(err2 <= maxErr)
|
|
|| !(err3 <= maxErr)
|
|
|| !(err4 <= maxErr))
|
|
{
|
|
// Try flushing the denormals
|
|
if (hasDenormals)
|
|
{
|
|
// If implementation decide to
|
|
// flush subnormals to zero, max
|
|
// error needs to be adjusted
|
|
maxErr += 4 * FLT_MIN;
|
|
|
|
maxPixel =
|
|
sample_image_pixel_float_offset(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j],
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z,
|
|
imageSampler, expected,
|
|
0, NULL, 0);
|
|
|
|
err1 = ABS_ERROR(
|
|
sRGBmap(resultPtr[0]),
|
|
sRGBmap(expected[0]));
|
|
err2 = ABS_ERROR(
|
|
sRGBmap(resultPtr[1]),
|
|
sRGBmap(expected[1]));
|
|
err3 = ABS_ERROR(
|
|
sRGBmap(resultPtr[2]),
|
|
sRGBmap(expected[2]));
|
|
err4 = ABS_ERROR(resultPtr[3],
|
|
expected[3]);
|
|
}
|
|
}
|
|
|
|
found_pixel = (err1 <= maxErr)
|
|
&& (err2 <= maxErr)
|
|
&& (err3 <= maxErr)
|
|
&& (err4 <= maxErr);
|
|
} // norm_offset_z
|
|
} // norm_offset_y
|
|
} // norm_offset_x
|
|
|
|
// Step 2: If we did not find a match, then print
|
|
// out debugging info.
|
|
if (!found_pixel)
|
|
{
|
|
// For the normalized case on a GPU we put in
|
|
// offsets to the X and Y to see if we land on
|
|
// the right pixel. This addresses the
|
|
// significant inaccuracy in GPU normalization
|
|
// in OpenCL 1.0.
|
|
checkOnlyOnePixel = 0;
|
|
int shouldReturn = 0;
|
|
for (float norm_offset_x = -offset;
|
|
norm_offset_x <= offset
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_x += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_y = -offset;
|
|
norm_offset_y <= offset
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_y += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_z = -offset;
|
|
norm_offset_z <= offset
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_z += NORM_OFFSET)
|
|
{
|
|
|
|
int hasDenormals = 0;
|
|
FloatPixel maxPixel =
|
|
sample_image_pixel_float_offset(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j],
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z, imageSampler,
|
|
expected, 0, &hasDenormals,
|
|
0);
|
|
|
|
float err1 =
|
|
ABS_ERROR(sRGBmap(resultPtr[0]),
|
|
sRGBmap(expected[0]));
|
|
float err2 =
|
|
ABS_ERROR(sRGBmap(resultPtr[1]),
|
|
sRGBmap(expected[1]));
|
|
float err3 =
|
|
ABS_ERROR(sRGBmap(resultPtr[2]),
|
|
sRGBmap(expected[2]));
|
|
float err4 = ABS_ERROR(resultPtr[3],
|
|
expected[3]);
|
|
float maxErr = 0.6;
|
|
|
|
if (!(err1 <= maxErr)
|
|
|| !(err2 <= maxErr)
|
|
|| !(err3 <= maxErr)
|
|
|| !(err4 <= maxErr))
|
|
{
|
|
// Try flushing the denormals
|
|
if (hasDenormals)
|
|
{
|
|
// If implementation decide
|
|
// to flush subnormals to
|
|
// zero, max error needs to
|
|
// be adjusted
|
|
maxErr += 4 * FLT_MIN;
|
|
|
|
maxPixel =
|
|
sample_image_pixel_float(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j],
|
|
imageSampler,
|
|
expected, 0, NULL,
|
|
0);
|
|
|
|
err1 = ABS_ERROR(
|
|
sRGBmap(resultPtr[0]),
|
|
sRGBmap(expected[0]));
|
|
err2 = ABS_ERROR(
|
|
sRGBmap(resultPtr[1]),
|
|
sRGBmap(expected[1]));
|
|
err3 = ABS_ERROR(
|
|
sRGBmap(resultPtr[2]),
|
|
sRGBmap(expected[2]));
|
|
err4 =
|
|
ABS_ERROR(resultPtr[3],
|
|
expected[3]);
|
|
}
|
|
}
|
|
|
|
if (!(err1 <= maxErr)
|
|
|| !(err2 <= maxErr)
|
|
|| !(err3 <= maxErr)
|
|
|| !(err4 <= maxErr))
|
|
{
|
|
log_error(
|
|
"FAILED norm_offsets: %g , "
|
|
"%g , %g:\n",
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z);
|
|
|
|
float tempOut[4];
|
|
shouldReturn |=
|
|
determine_validation_error_offset<
|
|
float>(
|
|
imagePtr, &lodInfo,
|
|
imageSampler, resultPtr,
|
|
expected, error,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j],
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z, j,
|
|
numTries, numClamped,
|
|
true, 0);
|
|
log_error("Step by step:\n");
|
|
FloatPixel temp =
|
|
sample_image_pixel_float_offset(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j],
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z,
|
|
imageSampler, tempOut,
|
|
1 /*verbose*/,
|
|
&hasDenormals, 0);
|
|
log_error(
|
|
"\tulps: %2.2f, %2.2f, "
|
|
"%2.2f, %2.2f (max "
|
|
"allowed: %2.2f)\n\n",
|
|
Ulp_Error(resultPtr[0],
|
|
expected[0]),
|
|
Ulp_Error(resultPtr[1],
|
|
expected[1]),
|
|
Ulp_Error(resultPtr[2],
|
|
expected[2]),
|
|
Ulp_Error(resultPtr[3],
|
|
expected[3]),
|
|
Ulp_Error(
|
|
MAKE_HEX_FLOAT(
|
|
0x1.000002p0f,
|
|
0x1000002L, -24)
|
|
+ maxErr,
|
|
MAKE_HEX_FLOAT(
|
|
0x1.000002p0f,
|
|
0x1000002L, -24)));
|
|
}
|
|
else
|
|
{
|
|
log_error(
|
|
"Test error: we should "
|
|
"have detected this "
|
|
"passing above.\n");
|
|
}
|
|
} // norm_offset_z
|
|
} // norm_offset_y
|
|
} // norm_offset_x
|
|
if (shouldReturn) return 1;
|
|
} // if (!found_pixel)
|
|
|
|
resultPtr += 4;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
* FLOAT output type
|
|
*/
|
|
else if (outputType == kFloat)
|
|
{
|
|
// Validate float results
|
|
float *resultPtr = (float *)(char *)resultValues;
|
|
float expected[4], error = 0.0f;
|
|
float maxErr = get_max_relative_error(
|
|
imageInfo->format, imageSampler, 1 /*3D*/,
|
|
CL_FILTER_LINEAR == imageSampler->filter_mode);
|
|
|
|
for (size_t z = 0, j = 0; z < depth_lod; z++)
|
|
{
|
|
for (size_t y = 0; y < height_lod; y++)
|
|
{
|
|
for (size_t x = 0; x < width_lod; x++, j++)
|
|
{
|
|
// Step 1: go through and see if the results verify
|
|
// for the pixel For the normalized case on a GPU we
|
|
// put in offsets to the X, Y and Z to see if we
|
|
// land on the right pixel. This addresses the
|
|
// significant inaccuracy in GPU normalization in
|
|
// OpenCL 1.0.
|
|
int checkOnlyOnePixel = 0;
|
|
int found_pixel = 0;
|
|
float offset = NORM_OFFSET;
|
|
if (!imageSampler->normalized_coords
|
|
|| imageSampler->filter_mode
|
|
!= CL_FILTER_NEAREST
|
|
|| NORM_OFFSET == 0
|
|
#if defined(__APPLE__)
|
|
// Apple requires its CPU implementation to do
|
|
// correctly rounded address arithmetic in all
|
|
// modes
|
|
|| !(gDeviceType & CL_DEVICE_TYPE_GPU)
|
|
#endif
|
|
)
|
|
offset = 0.0f; // Loop only once
|
|
|
|
for (float norm_offset_x = -offset;
|
|
norm_offset_x <= offset && !found_pixel;
|
|
norm_offset_x += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_y = -offset;
|
|
norm_offset_y <= offset && !found_pixel;
|
|
norm_offset_y += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_z = -offset;
|
|
norm_offset_z <= NORM_OFFSET
|
|
&& !found_pixel;
|
|
norm_offset_z += NORM_OFFSET)
|
|
{
|
|
|
|
int hasDenormals = 0;
|
|
FloatPixel maxPixel =
|
|
sample_image_pixel_float_offset(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j], norm_offset_x,
|
|
norm_offset_y, norm_offset_z,
|
|
imageSampler, expected, 0,
|
|
&hasDenormals, 0);
|
|
|
|
float err1 = ABS_ERROR(resultPtr[0],
|
|
expected[0]);
|
|
float err2 = ABS_ERROR(resultPtr[1],
|
|
expected[1]);
|
|
float err3 = ABS_ERROR(resultPtr[2],
|
|
expected[2]);
|
|
float err4 = ABS_ERROR(resultPtr[3],
|
|
expected[3]);
|
|
// Clamp to the minimum absolute error
|
|
// for the format
|
|
if (err1 > 0
|
|
&& err1 < formatAbsoluteError)
|
|
{
|
|
err1 = 0.0f;
|
|
}
|
|
if (err2 > 0
|
|
&& err2 < formatAbsoluteError)
|
|
{
|
|
err2 = 0.0f;
|
|
}
|
|
if (err3 > 0
|
|
&& err3 < formatAbsoluteError)
|
|
{
|
|
err3 = 0.0f;
|
|
}
|
|
if (err4 > 0
|
|
&& err4 < formatAbsoluteError)
|
|
{
|
|
err4 = 0.0f;
|
|
}
|
|
float maxErr1 = std::max(
|
|
maxErr * maxPixel.p[0], FLT_MIN);
|
|
float maxErr2 = std::max(
|
|
maxErr * maxPixel.p[1], FLT_MIN);
|
|
float maxErr3 = std::max(
|
|
maxErr * maxPixel.p[2], FLT_MIN);
|
|
float maxErr4 = std::max(
|
|
maxErr * maxPixel.p[3], FLT_MIN);
|
|
|
|
if (!(err1 <= maxErr1)
|
|
|| !(err2 <= maxErr2)
|
|
|| !(err3 <= maxErr3)
|
|
|| !(err4 <= maxErr4))
|
|
{
|
|
// Try flushing the denormals
|
|
if (hasDenormals)
|
|
{
|
|
// If implementation decide to
|
|
// flush subnormals to zero, max
|
|
// error needs to be adjusted
|
|
maxErr1 += 4 * FLT_MIN;
|
|
maxErr2 += 4 * FLT_MIN;
|
|
maxErr3 += 4 * FLT_MIN;
|
|
maxErr4 += 4 * FLT_MIN;
|
|
|
|
maxPixel =
|
|
sample_image_pixel_float_offset(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j],
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z,
|
|
imageSampler, expected,
|
|
0, NULL, 0);
|
|
|
|
err1 = ABS_ERROR(resultPtr[0],
|
|
expected[0]);
|
|
err2 = ABS_ERROR(resultPtr[1],
|
|
expected[1]);
|
|
err3 = ABS_ERROR(resultPtr[2],
|
|
expected[2]);
|
|
err4 = ABS_ERROR(resultPtr[3],
|
|
expected[3]);
|
|
}
|
|
}
|
|
|
|
found_pixel = (err1 <= maxErr1)
|
|
&& (err2 <= maxErr2)
|
|
&& (err3 <= maxErr3)
|
|
&& (err4 <= maxErr4);
|
|
} // norm_offset_z
|
|
} // norm_offset_y
|
|
} // norm_offset_x
|
|
|
|
// Step 2: If we did not find a match, then print
|
|
// out debugging info.
|
|
if (!found_pixel)
|
|
{
|
|
// For the normalized case on a GPU we put in
|
|
// offsets to the X and Y to see if we land on
|
|
// the right pixel. This addresses the
|
|
// significant inaccuracy in GPU normalization
|
|
// in OpenCL 1.0.
|
|
checkOnlyOnePixel = 0;
|
|
int shouldReturn = 0;
|
|
for (float norm_offset_x = -offset;
|
|
norm_offset_x <= offset
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_x += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_y = -offset;
|
|
norm_offset_y <= offset
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_y += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_z = -offset;
|
|
norm_offset_z <= offset
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_z += NORM_OFFSET)
|
|
{
|
|
|
|
int hasDenormals = 0;
|
|
FloatPixel maxPixel =
|
|
sample_image_pixel_float_offset(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j],
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z, imageSampler,
|
|
expected, 0, &hasDenormals,
|
|
0);
|
|
|
|
float err1 = ABS_ERROR(resultPtr[0],
|
|
expected[0]);
|
|
float err2 = ABS_ERROR(resultPtr[1],
|
|
expected[1]);
|
|
float err3 = ABS_ERROR(resultPtr[2],
|
|
expected[2]);
|
|
float err4 = ABS_ERROR(resultPtr[3],
|
|
expected[3]);
|
|
float maxErr1 =
|
|
std::max(maxErr * maxPixel.p[0],
|
|
FLT_MIN);
|
|
float maxErr2 =
|
|
std::max(maxErr * maxPixel.p[1],
|
|
FLT_MIN);
|
|
float maxErr3 =
|
|
std::max(maxErr * maxPixel.p[2],
|
|
FLT_MIN);
|
|
float maxErr4 =
|
|
std::max(maxErr * maxPixel.p[3],
|
|
FLT_MIN);
|
|
|
|
|
|
if (!(err1 <= maxErr1)
|
|
|| !(err2 <= maxErr2)
|
|
|| !(err3 <= maxErr3)
|
|
|| !(err4 <= maxErr4))
|
|
{
|
|
// Try flushing the denormals
|
|
if (hasDenormals)
|
|
{
|
|
maxErr1 += 4 * FLT_MIN;
|
|
maxErr2 += 4 * FLT_MIN;
|
|
maxErr3 += 4 * FLT_MIN;
|
|
maxErr4 += 4 * FLT_MIN;
|
|
|
|
maxPixel =
|
|
sample_image_pixel_float(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j],
|
|
imageSampler,
|
|
expected, 0, NULL,
|
|
0);
|
|
|
|
err1 =
|
|
ABS_ERROR(resultPtr[0],
|
|
expected[0]);
|
|
err2 =
|
|
ABS_ERROR(resultPtr[1],
|
|
expected[1]);
|
|
err3 =
|
|
ABS_ERROR(resultPtr[2],
|
|
expected[2]);
|
|
err4 =
|
|
ABS_ERROR(resultPtr[3],
|
|
expected[3]);
|
|
}
|
|
}
|
|
|
|
if (!(err1 <= maxErr1)
|
|
|| !(err2 <= maxErr2)
|
|
|| !(err3 <= maxErr3)
|
|
|| !(err4 <= maxErr4))
|
|
{
|
|
log_error(
|
|
"FAILED norm_offsets: %g , "
|
|
"%g , %g:\n",
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z);
|
|
|
|
float tempOut[4];
|
|
shouldReturn |=
|
|
determine_validation_error_offset<
|
|
float>(
|
|
imagePtr, &lodInfo,
|
|
imageSampler, resultPtr,
|
|
expected, error,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j],
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z, j,
|
|
numTries, numClamped,
|
|
true, 0);
|
|
log_error("Step by step:\n");
|
|
FloatPixel temp =
|
|
sample_image_pixel_float_offset(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j],
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z,
|
|
imageSampler, tempOut,
|
|
1 /*verbose*/,
|
|
&hasDenormals, 0);
|
|
log_error(
|
|
"\tulps: %2.2f, %2.2f, "
|
|
"%2.2f, %2.2f (max "
|
|
"allowed: %2.2f)\n\n",
|
|
Ulp_Error(resultPtr[0],
|
|
expected[0]),
|
|
Ulp_Error(resultPtr[1],
|
|
expected[1]),
|
|
Ulp_Error(resultPtr[2],
|
|
expected[2]),
|
|
Ulp_Error(resultPtr[3],
|
|
expected[3]),
|
|
Ulp_Error(
|
|
MAKE_HEX_FLOAT(
|
|
0x1.000002p0f,
|
|
0x1000002L, -24)
|
|
+ maxErr,
|
|
MAKE_HEX_FLOAT(
|
|
0x1.000002p0f,
|
|
0x1000002L, -24)));
|
|
}
|
|
else
|
|
{
|
|
log_error(
|
|
"Test error: we should "
|
|
"have detected this "
|
|
"passing above.\n");
|
|
}
|
|
} // norm_offset_z
|
|
} // norm_offset_y
|
|
} // norm_offset_x
|
|
if (shouldReturn) return 1;
|
|
} // if (!found_pixel)
|
|
|
|
resultPtr += 4;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
* UINT output type
|
|
*/
|
|
else if (outputType == kUInt)
|
|
{
|
|
// Validate unsigned integer results
|
|
unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
|
|
unsigned int expected[4];
|
|
float error;
|
|
for (size_t z = 0, j = 0; z < depth_lod; z++)
|
|
{
|
|
for (size_t y = 0; y < height_lod; y++)
|
|
{
|
|
for (size_t x = 0; x < width_lod; x++, j++)
|
|
{
|
|
// Step 1: go through and see if the results verify
|
|
// for the pixel For the normalized case on a GPU we
|
|
// put in offsets to the X, Y and Z to see if we
|
|
// land on the right pixel. This addresses the
|
|
// significant inaccuracy in GPU normalization in
|
|
// OpenCL 1.0.
|
|
int checkOnlyOnePixel = 0;
|
|
int found_pixel = 0;
|
|
for (float norm_offset_x = -NORM_OFFSET;
|
|
norm_offset_x <= NORM_OFFSET && !found_pixel
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_x += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_y = -NORM_OFFSET;
|
|
norm_offset_y <= NORM_OFFSET
|
|
&& !found_pixel && !checkOnlyOnePixel;
|
|
norm_offset_y += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_z = -NORM_OFFSET;
|
|
norm_offset_z <= NORM_OFFSET
|
|
&& !found_pixel && !checkOnlyOnePixel;
|
|
norm_offset_z += NORM_OFFSET)
|
|
{
|
|
|
|
// If we are not on a GPU, or we are not
|
|
// normalized, then only test with
|
|
// offsets (0.0, 0.0) E.g., test one
|
|
// pixel.
|
|
if (!imageSampler->normalized_coords
|
|
|| !(gDeviceType
|
|
& CL_DEVICE_TYPE_GPU)
|
|
|| NORM_OFFSET == 0)
|
|
{
|
|
norm_offset_x = 0.0f;
|
|
norm_offset_y = 0.0f;
|
|
norm_offset_z = 0.0f;
|
|
checkOnlyOnePixel = 1;
|
|
}
|
|
|
|
sample_image_pixel_offset<unsigned int>(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j], yOffsetValues[j],
|
|
zOffsetValues[j], norm_offset_x,
|
|
norm_offset_y, norm_offset_z,
|
|
imageSampler, expected, 0);
|
|
|
|
error = errMax(
|
|
errMax(abs_diff_uint(expected[0],
|
|
resultPtr[0]),
|
|
abs_diff_uint(expected[1],
|
|
resultPtr[1])),
|
|
errMax(
|
|
abs_diff_uint(expected[2],
|
|
resultPtr[2]),
|
|
abs_diff_uint(expected[3],
|
|
resultPtr[3])));
|
|
|
|
if (error < MAX_ERR) found_pixel = 1;
|
|
} // norm_offset_z
|
|
} // norm_offset_y
|
|
} // norm_offset_x
|
|
|
|
// Step 2: If we did not find a match, then print
|
|
// out debugging info.
|
|
if (!found_pixel)
|
|
{
|
|
// For the normalized case on a GPU we put in
|
|
// offsets to the X and Y to see if we land on
|
|
// the right pixel. This addresses the
|
|
// significant inaccuracy in GPU normalization
|
|
// in OpenCL 1.0.
|
|
checkOnlyOnePixel = 0;
|
|
int shouldReturn = 0;
|
|
for (float norm_offset_x = -NORM_OFFSET;
|
|
norm_offset_x <= NORM_OFFSET
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_x += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_y = -NORM_OFFSET;
|
|
norm_offset_y <= NORM_OFFSET
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_y += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_z = -NORM_OFFSET;
|
|
norm_offset_z <= NORM_OFFSET
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_z += NORM_OFFSET)
|
|
{
|
|
|
|
// If we are not on a GPU, or we are
|
|
// not normalized, then only test
|
|
// with offsets (0.0, 0.0) E.g.,
|
|
// test one pixel.
|
|
if (!imageSampler->normalized_coords
|
|
|| gDeviceType
|
|
!= CL_DEVICE_TYPE_GPU
|
|
|| NORM_OFFSET == 0)
|
|
{
|
|
norm_offset_x = 0.0f;
|
|
norm_offset_y = 0.0f;
|
|
norm_offset_z = 0.0f;
|
|
checkOnlyOnePixel = 1;
|
|
}
|
|
|
|
sample_image_pixel_offset<
|
|
unsigned int>(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j], norm_offset_x,
|
|
norm_offset_y, norm_offset_z,
|
|
imageSampler, expected, 0);
|
|
|
|
error = errMax(
|
|
errMax(
|
|
abs_diff_uint(expected[0],
|
|
resultPtr[0]),
|
|
abs_diff_uint(
|
|
expected[1],
|
|
resultPtr[1])),
|
|
errMax(
|
|
abs_diff_uint(expected[2],
|
|
resultPtr[2]),
|
|
abs_diff_uint(
|
|
expected[3],
|
|
resultPtr[3])));
|
|
|
|
if (error > MAX_ERR)
|
|
{
|
|
log_error(
|
|
"FAILED norm_offsets: %g , "
|
|
"%g , %g:\n",
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z);
|
|
shouldReturn |=
|
|
determine_validation_error_offset<
|
|
unsigned int>(
|
|
imagePtr, &lodInfo,
|
|
imageSampler, resultPtr,
|
|
expected, error,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j],
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z, j,
|
|
numTries, numClamped,
|
|
false, 0);
|
|
}
|
|
else
|
|
{
|
|
log_error(
|
|
"Test error: we should "
|
|
"have detected this "
|
|
"passing above.\n");
|
|
}
|
|
} // norm_offset_z
|
|
} // norm_offset_y
|
|
} // norm_offset_x
|
|
if (shouldReturn) return 1;
|
|
} // if (!found_pixel)
|
|
|
|
resultPtr += 4;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
/*
|
|
* INT output type
|
|
*/
|
|
{
|
|
// Validate integer results
|
|
int *resultPtr = (int *)(char *)resultValues;
|
|
int expected[4];
|
|
float error;
|
|
for (size_t z = 0, j = 0; z < depth_lod; z++)
|
|
{
|
|
for (size_t y = 0; y < height_lod; y++)
|
|
{
|
|
for (size_t x = 0; x < width_lod; x++, j++)
|
|
{
|
|
// Step 1: go through and see if the results verify
|
|
// for the pixel For the normalized case on a GPU we
|
|
// put in offsets to the X, Y and Z to see if we
|
|
// land on the right pixel. This addresses the
|
|
// significant inaccuracy in GPU normalization in
|
|
// OpenCL 1.0.
|
|
int checkOnlyOnePixel = 0;
|
|
int found_pixel = 0;
|
|
for (float norm_offset_x = -NORM_OFFSET;
|
|
norm_offset_x <= NORM_OFFSET && !found_pixel
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_x += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_y = -NORM_OFFSET;
|
|
norm_offset_y <= NORM_OFFSET
|
|
&& !found_pixel && !checkOnlyOnePixel;
|
|
norm_offset_y += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_z = -NORM_OFFSET;
|
|
norm_offset_z <= NORM_OFFSET
|
|
&& !found_pixel && !checkOnlyOnePixel;
|
|
norm_offset_z += NORM_OFFSET)
|
|
{
|
|
|
|
// If we are not on a GPU, or we are not
|
|
// normalized, then only test with
|
|
// offsets (0.0, 0.0) E.g., test one
|
|
// pixel.
|
|
if (!imageSampler->normalized_coords
|
|
|| !(gDeviceType
|
|
& CL_DEVICE_TYPE_GPU)
|
|
|| NORM_OFFSET == 0)
|
|
{
|
|
norm_offset_x = 0.0f;
|
|
norm_offset_y = 0.0f;
|
|
norm_offset_z = 0.0f;
|
|
checkOnlyOnePixel = 1;
|
|
}
|
|
|
|
sample_image_pixel_offset<int>(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j], yOffsetValues[j],
|
|
zOffsetValues[j], norm_offset_x,
|
|
norm_offset_y, norm_offset_z,
|
|
imageSampler, expected, 0);
|
|
|
|
error = errMax(
|
|
errMax(abs_diff_int(expected[0],
|
|
resultPtr[0]),
|
|
abs_diff_int(expected[1],
|
|
resultPtr[1])),
|
|
errMax(abs_diff_int(expected[2],
|
|
resultPtr[2]),
|
|
abs_diff_int(expected[3],
|
|
resultPtr[3])));
|
|
|
|
if (error < MAX_ERR) found_pixel = 1;
|
|
} // norm_offset_z
|
|
} // norm_offset_y
|
|
} // norm_offset_x
|
|
|
|
// Step 2: If we did not find a match, then print
|
|
// out debugging info.
|
|
if (!found_pixel)
|
|
{
|
|
// For the normalized case on a GPU we put in
|
|
// offsets to the X and Y to see if we land on
|
|
// the right pixel. This addresses the
|
|
// significant inaccuracy in GPU normalization
|
|
// in OpenCL 1.0.
|
|
checkOnlyOnePixel = 0;
|
|
int shouldReturn = 0;
|
|
for (float norm_offset_x = -NORM_OFFSET;
|
|
norm_offset_x <= NORM_OFFSET
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_x += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_y = -NORM_OFFSET;
|
|
norm_offset_y <= NORM_OFFSET
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_y += NORM_OFFSET)
|
|
{
|
|
for (float norm_offset_z = -NORM_OFFSET;
|
|
norm_offset_z <= NORM_OFFSET
|
|
&& !checkOnlyOnePixel;
|
|
norm_offset_z += NORM_OFFSET)
|
|
{
|
|
|
|
// If we are not on a GPU, or we are
|
|
// not normalized, then only test
|
|
// with offsets (0.0, 0.0) E.g.,
|
|
// test one pixel.
|
|
if (!imageSampler->normalized_coords
|
|
|| gDeviceType
|
|
!= CL_DEVICE_TYPE_GPU
|
|
|| NORM_OFFSET == 0
|
|
|| NORM_OFFSET == 0
|
|
|| NORM_OFFSET == 0)
|
|
{
|
|
norm_offset_x = 0.0f;
|
|
norm_offset_y = 0.0f;
|
|
norm_offset_z = 0.0f;
|
|
checkOnlyOnePixel = 1;
|
|
}
|
|
|
|
sample_image_pixel_offset<int>(
|
|
imagePtr, &lodInfo,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j], norm_offset_x,
|
|
norm_offset_y, norm_offset_z,
|
|
imageSampler, expected, 0);
|
|
|
|
error = errMax(
|
|
errMax(
|
|
abs_diff_int(expected[0],
|
|
resultPtr[0]),
|
|
abs_diff_int(expected[1],
|
|
resultPtr[1])),
|
|
errMax(
|
|
abs_diff_int(expected[2],
|
|
resultPtr[2]),
|
|
abs_diff_int(
|
|
expected[3],
|
|
resultPtr[3])));
|
|
|
|
if (error > MAX_ERR)
|
|
{
|
|
log_error(
|
|
"FAILED norm_offsets: %g , "
|
|
"%g , %g:\n",
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z);
|
|
shouldReturn |=
|
|
determine_validation_error_offset<
|
|
int>(
|
|
imagePtr, &lodInfo,
|
|
imageSampler, resultPtr,
|
|
expected, error,
|
|
xOffsetValues[j],
|
|
yOffsetValues[j],
|
|
zOffsetValues[j],
|
|
norm_offset_x,
|
|
norm_offset_y,
|
|
norm_offset_z, j,
|
|
numTries, numClamped,
|
|
false, 0);
|
|
}
|
|
else
|
|
{
|
|
log_error(
|
|
"Test error: we should "
|
|
"have detected this "
|
|
"passing above.\n");
|
|
}
|
|
} // norm_offset_z
|
|
} // norm_offset_y
|
|
} // norm_offset_x
|
|
if (shouldReturn) return 1;
|
|
} // if (!found_pixel)
|
|
|
|
resultPtr += 4;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
{
|
|
nextLevelOffset +=
|
|
image_lod_size * get_pixel_size(imageInfo->format);
|
|
width_lod = lodInfo.width =
|
|
(lodInfo.width >> 1) ? (lodInfo.width >> 1) : 1;
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
|
height_lod = lodInfo.height =
|
|
(lodInfo.height >> 1) ? (lodInfo.height >> 1) : 1;
|
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
|
|
depth_lod = lodInfo.depth =
|
|
(lodInfo.depth >> 1) ? (lodInfo.depth >> 1) : 1;
|
|
lodInfo.rowPitch =
|
|
lodInfo.width * get_pixel_size(imageInfo->format);
|
|
if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
|
lodInfo.slicePitch = lodInfo.rowPitch;
|
|
else if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D
|
|
|| imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
|
|
lodInfo.slicePitch = lodInfo.rowPitch * lodInfo.height;
|
|
}
|
|
}
|
|
|
|
return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
|
|
}
|
|
|
|
void filter_undefined_bits(image_descriptor *imageInfo, char *resultPtr)
|
|
{
|
|
// mask off the top bit (bit 15) if the image format is (CL_UNORM_SHORT_555,
|
|
// CL_RGB). (Note: OpenCL says: the top bit is undefined meaning it can be
|
|
// either 0 or 1.)
|
|
if (imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555)
|
|
{
|
|
cl_ushort *temp = (cl_ushort *)resultPtr;
|
|
temp[0] &= 0x7fff;
|
|
}
|
|
}
|
|
|
|
int filter_rounding_errors(int forceCorrectlyRoundedWrites,
|
|
image_descriptor *imageInfo, float *errors)
|
|
{
|
|
// We are allowed 0.6 absolute error vs. infinitely precise for some
|
|
// normalized formats
|
|
if (0 == forceCorrectlyRoundedWrites
|
|
&& (imageInfo->format->image_channel_data_type == CL_UNORM_INT8
|
|
|| imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010
|
|
|| imageInfo->format->image_channel_data_type == CL_UNORM_INT16
|
|
|| imageInfo->format->image_channel_data_type == CL_SNORM_INT8
|
|
|| imageInfo->format->image_channel_data_type == CL_SNORM_INT16
|
|
|| imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555
|
|
|| imageInfo->format->image_channel_data_type
|
|
== CL_UNORM_SHORT_565))
|
|
{
|
|
if (!(fabsf(errors[0]) > 0.6f) && !(fabsf(errors[1]) > 0.6f)
|
|
&& !(fabsf(errors[2]) > 0.6f) && !(fabsf(errors[3]) > 0.6f))
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|