mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 14:09:03 +00:00
The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
1944 lines
58 KiB
C++
1944 lines
58 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "utils.h"
|
|
|
|
#include "../../test_common/harness/errorHelpers.h"
|
|
#include "../../test_common/harness/rounding_mode.h"
|
|
|
|
#include <math.h>
|
|
|
|
static RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
|
|
|
|
|
|
CResult::CResult():
|
|
_result(TEST_PASS), _resultLast(TEST_NORESULT)
|
|
{
|
|
|
|
}
|
|
|
|
CResult::~CResult()
|
|
{
|
|
|
|
}
|
|
|
|
CResult::TTestResult CResult::ResultLast() const
|
|
{
|
|
return _resultLast;
|
|
}
|
|
|
|
int CResult::Result() const
|
|
{
|
|
switch (_result)
|
|
{
|
|
case TEST_NORESULT:
|
|
case TEST_NOTSUPPORTED:
|
|
case TEST_PASS:
|
|
return 0;
|
|
break;
|
|
case TEST_FAIL:
|
|
return 1;
|
|
break;
|
|
case TEST_ERROR:
|
|
return 2;
|
|
break;
|
|
default:
|
|
return -1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void CResult::ResultSub( TTestResult result )
|
|
{
|
|
_resultLast = result;
|
|
if (static_cast<int>(result) > static_cast<int>(_result))
|
|
_result = result;
|
|
}
|
|
|
|
bool ExtensionCheck(const std::string &extension, cl_device_id deviceID)
|
|
{
|
|
std::string extensions;
|
|
size_t size = 0;
|
|
cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, 0, &size);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "clGetDeviceInfo failed\n");
|
|
return false;
|
|
}
|
|
|
|
if (size == 0)
|
|
{
|
|
print_error(error, "Invalid extension string size\n");
|
|
return false;
|
|
}
|
|
|
|
extensions.resize(size);
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, size, &extensions[0], 0);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "clGetDeviceInfo failed\n");
|
|
return false;
|
|
}
|
|
|
|
if (extensions.find(extension) != std::string::npos)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
void FunctionContextCreateToString(TContextFuncType contextCreateFunction, std::string &contextFunction)
|
|
{
|
|
switch(contextCreateFunction)
|
|
{
|
|
case CONTEXT_CREATE_DEFAULT:
|
|
contextFunction = "CreateContext";
|
|
break;
|
|
case CONTEXT_CREATE_FROM_TYPE:
|
|
contextFunction = "CreateContextFromType";
|
|
break;
|
|
default:
|
|
contextFunction = "Unknown";
|
|
log_error("FunctionContextCreateToString(): Unknown create function enum!");
|
|
break;
|
|
}
|
|
}
|
|
|
|
void AdapterToString(cl_dx9_media_adapter_type_khr adapterType, std::string &adapter)
|
|
{
|
|
switch(adapterType)
|
|
{
|
|
case CL_ADAPTER_D3D9_KHR:
|
|
adapter = "D3D9";
|
|
break;
|
|
case CL_ADAPTER_D3D9EX_KHR:
|
|
adapter = "D3D9EX";
|
|
break;
|
|
case CL_ADAPTER_DXVA_KHR:
|
|
adapter = "DXVA";
|
|
break;
|
|
default:
|
|
adapter = "Unknown";
|
|
log_error("AdapterToString(): Unknown adapter type!");
|
|
break;
|
|
}
|
|
}
|
|
|
|
cl_context_info AdapterTypeToContextInfo( cl_dx9_media_adapter_type_khr adapterType )
|
|
{
|
|
switch (adapterType)
|
|
{
|
|
case CL_ADAPTER_D3D9_KHR:
|
|
return CL_CONTEXT_ADAPTER_D3D9_KHR;
|
|
break;
|
|
case CL_ADAPTER_D3D9EX_KHR:
|
|
return CL_CONTEXT_ADAPTER_D3D9EX_KHR;
|
|
break;
|
|
case CL_ADAPTER_DXVA_KHR:
|
|
return CL_CONTEXT_ADAPTER_DXVA_KHR;
|
|
break;
|
|
default:
|
|
log_error("AdapterTypeToContextInfo(): Unknown adapter type!");
|
|
return 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void YUVGenerateNV12( std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
|
|
cl_uchar valueMin, cl_uchar valueMax, double valueAdd )
|
|
{
|
|
yuv.clear();
|
|
yuv.resize(width * height * 3 / 2, 0);
|
|
|
|
double min = static_cast<double>(valueMin);
|
|
double max = static_cast<double>(valueMax);
|
|
double range = 255;
|
|
double add = static_cast<double>(valueAdd * range);
|
|
double stepX = (max - min) / static_cast<double>(width);
|
|
double stepY = (max - min) /static_cast<double>(height);
|
|
|
|
//generate Y plane
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
unsigned int offset = i * width;
|
|
double valueYPlane0 = static_cast<double>(stepY * i);
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
double valueXPlane0 = static_cast<double>(stepX * j);
|
|
yuv.at(offset + j) = static_cast<cl_uchar>(min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
|
|
}
|
|
}
|
|
|
|
//generate UV planes
|
|
for (unsigned int i = 0; i < height / 2; ++i)
|
|
{
|
|
unsigned int offset = width * height + i * width;
|
|
double valueYPlane1 = static_cast<double>(stepY * i);
|
|
double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
|
|
for (unsigned int j = 0; j < width / 2; ++j)
|
|
{
|
|
double valueXPlane1 = static_cast<double>(stepX * j);
|
|
double valueXPlane2 = static_cast<double>(stepX * (width / 2 + j));
|
|
|
|
yuv.at(offset + j * 2) = static_cast<cl_uchar>(min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
|
|
yuv.at(offset + j * 2 + 1) = static_cast<cl_uchar>(min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
|
|
}
|
|
}
|
|
}
|
|
|
|
void YUVGenerateYV12( std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height, cl_uchar valueMin, cl_uchar valueMax, double valueAdd /*= 0.0*/ )
|
|
{
|
|
yuv.clear();
|
|
yuv.resize(width * height * 3 / 2, 0);
|
|
|
|
double min = static_cast<double>(valueMin);
|
|
double max = static_cast<double>(valueMax);
|
|
double range = 255;
|
|
double add = static_cast<double>(valueAdd * range);
|
|
double stepX = (max - min) / static_cast<double>(width);
|
|
double stepY = (max - min) /static_cast<double>(height);
|
|
|
|
unsigned offset = 0;
|
|
|
|
//generate Y plane
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
unsigned int plane0Offset = offset + i * width;
|
|
double valueYPlane0 = static_cast<double>(stepY * i);
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
double valueXPlane0 = static_cast<double>(stepX * j);
|
|
yuv.at(plane0Offset + j) = static_cast<cl_uchar>(min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
|
|
}
|
|
}
|
|
|
|
//generate V plane
|
|
offset += width * height;
|
|
for (unsigned int i = 0; i < height / 2; ++i)
|
|
{
|
|
unsigned int plane1Offset = offset + i * width / 2;
|
|
double valueYPlane1 = static_cast<double>(stepY * i);
|
|
for (unsigned int j = 0; j < width / 2; ++j)
|
|
{
|
|
double valueXPlane1 = static_cast<double>(stepX * j);
|
|
yuv.at(plane1Offset + j) = static_cast<cl_uchar>(min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
|
|
}
|
|
}
|
|
|
|
//generate U plane
|
|
offset += width * height / 4;
|
|
for (unsigned int i = 0; i < height / 2; ++i)
|
|
{
|
|
unsigned int plane2Offset = offset + i * width / 2;
|
|
double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
|
|
for (unsigned int j = 0; j < width / 2; ++j)
|
|
{
|
|
double valueXPlane2 = static_cast<double>(stepX * j);
|
|
yuv.at(plane2Offset + j) = static_cast<cl_uchar>(min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
bool YUVGenerate( TSurfaceFormat surfaceFormat, std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height, cl_uchar valueMin, cl_uchar valueMax, double valueAdd /*= 0.0*/ )
|
|
{
|
|
switch (surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_NV12:
|
|
YUVGenerateNV12(yuv, width, height, valueMin, valueMax, valueAdd);
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
YUVGenerateYV12(yuv, width, height, valueMin, valueMax, valueAdd);
|
|
break;
|
|
default:
|
|
log_error("YUVGenerate(): Invalid surface type\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool YUVSurfaceSetNV12( std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
#if defined(_WIN32)
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
D3DLOCKED_RECT rect;
|
|
if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
|
|
{
|
|
log_error("YUVSurfaceSetNV12(): Surface lock failed\n");
|
|
return false;
|
|
}
|
|
|
|
size_t pitch = rect.Pitch / sizeof(cl_uchar);
|
|
size_t lineSize = width * sizeof(cl_uchar);
|
|
cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
|
|
for (size_t y = 0; y < height; ++y)
|
|
memcpy(ptr + y * pitch, &yuv.at(y * width), lineSize);
|
|
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
memcpy(ptr + height * pitch + y * pitch, &yuv.at(width * height + y * width), lineSize);
|
|
|
|
(*d3dSurface)->UnlockRect();
|
|
|
|
return true;
|
|
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
bool YUVSurfaceSetYV12( std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
#if defined(_WIN32)
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
D3DLOCKED_RECT rect;
|
|
if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
|
|
{
|
|
log_error("YUVSurfaceSetYV12(): Surface lock failed!\n");
|
|
return false;
|
|
}
|
|
|
|
size_t pitch = rect.Pitch / sizeof(cl_uchar);
|
|
size_t pitchHalf = pitch / 2;
|
|
size_t lineSize = width * sizeof(cl_uchar);
|
|
size_t lineHalfSize = lineSize / 2;
|
|
size_t surfaceOffset = 0;
|
|
size_t yuvOffset = 0;
|
|
cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
|
|
|
|
for (size_t y = 0; y < height; ++y)
|
|
memcpy(ptr + surfaceOffset + y * pitch, &yuv.at(yuvOffset + y * width), lineSize);
|
|
|
|
surfaceOffset += height * pitch;
|
|
yuvOffset += width * height;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
memcpy(ptr + surfaceOffset + y * pitchHalf, &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize);
|
|
|
|
surfaceOffset += pitchHalf * height / 2;
|
|
yuvOffset += width * height / 4;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
memcpy(ptr + surfaceOffset + y * pitchHalf, &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize);
|
|
|
|
(*d3dSurface)->UnlockRect();
|
|
|
|
return true;
|
|
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
bool YUVSurfaceSet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height )
|
|
{
|
|
switch (surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_NV12:
|
|
if(!YUVSurfaceSetNV12(surface, yuv, width, height))
|
|
return false;
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
if(!YUVSurfaceSetYV12(surface, yuv, width, height))
|
|
return false;
|
|
break;
|
|
default:
|
|
log_error("YUVSurfaceSet(): Invalid surface type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool YUVSurfaceGetNV12( std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
#if defined(_WIN32)
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
D3DLOCKED_RECT rect;
|
|
if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
|
|
{
|
|
log_error("YUVSurfaceGetNV12(): Surface lock failed!\n");
|
|
return false;
|
|
}
|
|
|
|
size_t pitch = rect.Pitch / sizeof(cl_uchar);
|
|
size_t lineSize = width * sizeof(cl_uchar);
|
|
cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
|
|
size_t yuvOffset = 0;
|
|
size_t surfaceOffset = 0;
|
|
for (size_t y = 0; y < height; ++y)
|
|
memcpy(&yuv.at(yuvOffset + y * width), ptr + y * pitch, lineSize);
|
|
|
|
yuvOffset += width * height;
|
|
surfaceOffset += pitch * height;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch, lineSize);
|
|
|
|
(*d3dSurface)->UnlockRect();
|
|
|
|
return true;
|
|
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
bool YUVSurfaceGetYV12( std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height )
|
|
{
|
|
#if defined(_WIN32)
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
D3DLOCKED_RECT rect;
|
|
if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
|
|
{
|
|
log_error("YUVSurfaceGetYV12(): Surface lock failed!\n");
|
|
return false;
|
|
}
|
|
|
|
size_t pitch = rect.Pitch / sizeof(cl_uchar);
|
|
size_t pitchHalf = pitch / 2;
|
|
size_t lineSize = width * sizeof(cl_uchar);
|
|
size_t lineHalfSize = lineSize / 2;
|
|
size_t surfaceOffset = 0;
|
|
size_t yuvOffset = 0;
|
|
cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
|
|
|
|
for (size_t y = 0; y < height; ++y)
|
|
memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch, lineSize);
|
|
|
|
surfaceOffset += pitch * height;
|
|
yuvOffset += width * height;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
memcpy(&yuv.at(yuvOffset + y * lineHalfSize), ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
|
|
|
|
surfaceOffset += pitchHalf * height / 2;
|
|
yuvOffset += width * height / 4;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
memcpy(&yuv.at(yuvOffset + y * lineHalfSize), ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
|
|
|
|
(*d3dSurface)->UnlockRect();
|
|
|
|
return true;
|
|
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
bool YUVSurfaceGet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
switch (surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_NV12:
|
|
if(!YUVSurfaceGetNV12(surface, yuv, width, height))
|
|
return false;
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
if(!YUVSurfaceGetYV12(surface, yuv, width, height))
|
|
return false;
|
|
break;
|
|
default:
|
|
log_error("YUVSurfaceGet(): Invalid surface type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool YUVCompareNV12( const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
//plane 0 verification
|
|
size_t offset = 0;
|
|
for (size_t y = 0; y < height; ++y)
|
|
{
|
|
size_t plane0Offset = offset + width * y;
|
|
for (size_t x = 0; x < width; ++x)
|
|
{
|
|
if (yuvTest[plane0Offset + x] != yuvRef[plane0Offset + x])
|
|
{
|
|
log_error("Plane 0 (Y) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
|
|
yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], x, y);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
//plane 1 and 2 verification
|
|
offset += width * height;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
{
|
|
size_t plane12Offset = offset + width * y;
|
|
for (size_t x = 0; x < width / 2; ++x)
|
|
{
|
|
if (yuvTest.at(plane12Offset + 2 * x) != yuvRef.at(plane12Offset + 2 * x))
|
|
{
|
|
log_error("Plane 1 (U) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
|
|
yuvRef[plane12Offset + 2 * x], yuvTest[plane12Offset + 2 * x], x, y);
|
|
return false;
|
|
}
|
|
|
|
if (yuvTest.at(plane12Offset + 2 * x + 1) != yuvRef.at(plane12Offset + 2 * x + 1))
|
|
{
|
|
log_error("Plane 2 (V) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
|
|
yuvRef[plane12Offset + 2 * x + 1], yuvTest[plane12Offset + 2 * x + 1], x, y);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool YUVCompareYV12( const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
//plane 0 verification
|
|
size_t offset = 0;
|
|
for (size_t y = 0; y < height; ++y)
|
|
{
|
|
size_t plane0Offset = width * y;
|
|
for (size_t x = 0; x < width; ++x)
|
|
{
|
|
if (yuvTest.at(plane0Offset + x) != yuvRef.at(plane0Offset + x))
|
|
{
|
|
log_error("Plane 0 (Y) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
|
|
yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], x ,y);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
//plane 1 verification
|
|
offset += width * height;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
{
|
|
size_t plane1Offset = offset + width * y / 2;
|
|
for (size_t x = 0; x < width / 2; ++x)
|
|
{
|
|
if (yuvTest.at(plane1Offset + x) != yuvRef.at(plane1Offset + x))
|
|
{
|
|
log_error("Plane 1 (V) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
|
|
yuvRef[plane1Offset + x], yuvTest[plane1Offset + x], x, y);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
//plane 2 verification
|
|
offset += width * height / 4;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
{
|
|
size_t plane2Offset = offset + width * y / 2;
|
|
for (size_t x = 0; x < width / 2; ++x)
|
|
{
|
|
if (yuvTest.at(plane2Offset + x) != yuvRef.at(plane2Offset + x))
|
|
{
|
|
log_error("Plane 2 (U) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
|
|
yuvRef[plane2Offset + x], yuvTest[plane2Offset + x], x, y);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool YUVCompare( TSurfaceFormat surfaceFormat, const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
switch (surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_NV12:
|
|
if (!YUVCompareNV12(yuvTest, yuvRef, width, height))
|
|
{
|
|
log_error("OCL object is different than expected!\n");
|
|
return false;
|
|
}
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
if (!YUVCompareYV12(yuvTest, yuvRef, width, height))
|
|
{
|
|
log_error("OCL object is different than expected!\n");
|
|
return false;
|
|
}
|
|
break;
|
|
default:
|
|
log_error("YUVCompare(): Invalid surface type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<float> &data, unsigned int width, unsigned int height,
|
|
unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
|
|
{
|
|
data.clear();
|
|
data.reserve(width * height * channelNum);
|
|
|
|
double valueMin = static_cast<double>(cmin);
|
|
double valueMax = static_cast<double>(cmax);
|
|
double stepX = (valueMax - valueMin) / static_cast<double>(width);
|
|
double stepY = (valueMax - valueMin) /static_cast<double>(height);
|
|
double valueAdd = static_cast<double>(add);
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
double valueY = static_cast<double>(stepY * i);
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
double valueX = static_cast<double>(stepX * j);
|
|
switch (channelNum)
|
|
{
|
|
case 1:
|
|
data.push_back(static_cast<float>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
|
|
break;
|
|
case 2:
|
|
data.push_back(static_cast<float>(valueMin + valueX + valueAdd));
|
|
data.push_back(static_cast<float>(valueMin + valueY + valueAdd));
|
|
break;
|
|
case 4:
|
|
data.push_back(static_cast<float>(valueMin + valueX + valueAdd));
|
|
data.push_back(static_cast<float>(valueMin + valueY + valueAdd));
|
|
data.push_back(static_cast<float>(valueMin + valueX / 2 + valueAdd));
|
|
data.push_back(static_cast<float>(valueMin + valueY / 2 + valueAdd));
|
|
break;
|
|
default:
|
|
log_error("DataGenerate(): invalid channel number!");
|
|
return;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_half> &data, unsigned int width, unsigned int height,
|
|
unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
|
|
{
|
|
data.clear();
|
|
data.reserve(width * height * channelNum);
|
|
|
|
double valueMin = static_cast<double>(cmin);
|
|
double valueMax = static_cast<double>(cmax);
|
|
double stepX = (valueMax - valueMin) / static_cast<double>(width);
|
|
double stepY = (valueMax - valueMin) /static_cast<double>(height);
|
|
|
|
switch(type)
|
|
{
|
|
case CL_HALF_FLOAT:
|
|
{
|
|
double valueAdd = static_cast<double>(add);
|
|
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
double valueY = static_cast<double>(stepY * i);
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
double valueX = static_cast<double>(stepX * j);
|
|
switch (channelNum)
|
|
{
|
|
case 1:
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX / 2 + valueY / 2 + valueAdd)));
|
|
break;
|
|
case 2:
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX + valueAdd)));
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY + valueAdd)));
|
|
break;
|
|
case 4:
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX + valueAdd)));
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY + valueAdd)));
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX / 2 + valueAdd)));
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY / 2 + valueAdd)));
|
|
break;
|
|
default:
|
|
log_error("DataGenerate(): invalid channel number!");
|
|
return;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case CL_UNORM_INT16:
|
|
{
|
|
double range = 65535;
|
|
double valueAdd = static_cast<double>(add * range);
|
|
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
double valueY = static_cast<double>(stepY * i * range);
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
double valueX = static_cast<double>(stepX * j * range);
|
|
switch (channelNum)
|
|
{
|
|
case 1:
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
|
|
break;
|
|
case 2:
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueX + valueAdd));
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueY + valueAdd));
|
|
break;
|
|
case 4:
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueX + valueAdd));
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueY + valueAdd));
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueX / 2 + valueAdd));
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueY / 2 + valueAdd));
|
|
break;
|
|
default:
|
|
log_error("DataGenerate(): invalid channel number!");
|
|
return;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
log_error("DataGenerate(): unknown data type!");
|
|
return;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_uchar> &data, unsigned int width, unsigned int height,
|
|
unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
|
|
{
|
|
data.clear();
|
|
data.reserve(width * height * channelNum);
|
|
|
|
double valueMin = static_cast<double>(cmin);
|
|
double valueMax = static_cast<double>(cmax);
|
|
double stepX = (valueMax - valueMin) / static_cast<double>(width);
|
|
double stepY = (valueMax - valueMin) /static_cast<double>(height);
|
|
|
|
double range = 255;
|
|
double valueAdd = static_cast<double>(add * range);
|
|
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
double valueY = static_cast<double>(stepY * i * range);
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
double valueX = static_cast<double>(stepX * j * range);
|
|
switch (channelNum)
|
|
{
|
|
case 1:
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
|
|
break;
|
|
case 2:
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueX + valueAdd));
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueY + valueAdd));
|
|
break;
|
|
case 4:
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueX + valueAdd));
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueY + valueAdd));
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueX / 2 + valueAdd));
|
|
if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8)
|
|
data.push_back(static_cast<cl_uchar>(0xff));
|
|
else
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueY / 2 + valueAdd));
|
|
break;
|
|
default:
|
|
log_error("DataGenerate(): invalid channel number!");
|
|
return;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<float> &dataTest, const std::vector<float> &dataExp,
|
|
unsigned int width, unsigned int height, unsigned int channelNum)
|
|
{
|
|
float epsilon = 0.000001f;
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
unsigned int offset = i * width * channelNum;
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
|
|
{
|
|
if (abs(dataTest.at(offset + j * channelNum + planeIdx) - dataExp.at(offset + j * channelNum + planeIdx)) > epsilon)
|
|
{
|
|
log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %f, expected value = %f\n",
|
|
j, i, planeIdx, dataTest[offset + j * channelNum + planeIdx], dataExp[offset + j * channelNum + planeIdx]);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_half> &dataTest, const std::vector<cl_half> &dataExp,
|
|
unsigned int width, unsigned int height, unsigned int channelNum)
|
|
{
|
|
switch(type)
|
|
{
|
|
case CL_HALF_FLOAT:
|
|
{
|
|
float epsilon = 0.001f;
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
unsigned int offset = i * width * channelNum;
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
|
|
{
|
|
float test = convert_half_to_float(dataTest.at(offset + j * channelNum + planeIdx));
|
|
float ref = convert_half_to_float(dataExp.at(offset + j * channelNum + planeIdx));
|
|
if (abs(test - ref) > epsilon)
|
|
{
|
|
log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %f, expected value = %f\n",
|
|
j, i, planeIdx, test, ref);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case CL_UNORM_INT16:
|
|
{
|
|
cl_ushort epsilon = 1;
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
unsigned int offset = i * width * channelNum;
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
|
|
{
|
|
cl_ushort test = dataTest.at(offset + j * channelNum + planeIdx);
|
|
cl_ushort ref = dataExp.at(offset + j * channelNum + planeIdx);
|
|
if (abs(test - ref) > epsilon)
|
|
{
|
|
log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %i, expected value = %i\n", j, i, planeIdx, test, ref);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
log_error("DataCompare(): Invalid data format!");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_uchar> &dataTest, const std::vector<cl_uchar> &dataExp,
|
|
unsigned int width, unsigned int height, unsigned int planeNum )
|
|
{
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
unsigned int offset = i * width * planeNum;
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
for(unsigned planeIdx = 0; planeIdx < planeNum; ++planeIdx)
|
|
{
|
|
if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8 && planeIdx == 3)
|
|
continue;
|
|
|
|
cl_uchar test = dataTest.at(offset + j * planeNum + planeIdx);
|
|
cl_uchar ref = dataExp.at(offset + j * planeNum + planeIdx);
|
|
if (test != ref)
|
|
{
|
|
log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %i, expected value = %i\n",
|
|
j, i, planeIdx, test, ref);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GetImageInfo( cl_mem object, cl_image_format formatExp, size_t elementSizeExp, size_t rowPitchExp,
|
|
size_t slicePitchExp, size_t widthExp, size_t heightExp, size_t depthExp , unsigned int planeExp)
|
|
{
|
|
bool result = true;
|
|
|
|
cl_image_format format;
|
|
if (clGetImageInfo(object, CL_IMAGE_FORMAT, sizeof(cl_image_format), &format, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_FORMAT) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (formatExp.image_channel_order != format.image_channel_order || formatExp.image_channel_data_type != format.image_channel_data_type)
|
|
{
|
|
log_error("Value of CL_IMAGE_FORMAT is different than expected\n");
|
|
result = false;
|
|
}
|
|
|
|
size_t elementSize = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elementSize, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_ELEMENT_SIZE) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (elementSizeExp != elementSize)
|
|
{
|
|
log_error("Value of CL_IMAGE_ELEMENT_SIZE is different than expected (size: %i, exp size: %i)\n", elementSize, elementSizeExp);
|
|
result = false;
|
|
}
|
|
|
|
size_t rowPitch = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_ROW_PITCH, sizeof(size_t), &rowPitch, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_ROW_PITCH) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if ((rowPitchExp == 0 && rowPitchExp != rowPitch) || (rowPitchExp > 0 && rowPitchExp > rowPitch))
|
|
{
|
|
log_error("Value of CL_IMAGE_ROW_PITCH is different than expected (size: %i, exp size: %i)\n", rowPitch, rowPitchExp);
|
|
result = false;
|
|
}
|
|
|
|
size_t slicePitch = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_SLICE_PITCH, sizeof(size_t), &slicePitch, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_SLICE_PITCH) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if ((slicePitchExp == 0 && slicePitchExp != slicePitch) || (slicePitchExp > 0 && slicePitchExp > slicePitch))
|
|
{
|
|
log_error("Value of CL_IMAGE_SLICE_PITCH is different than expected (size: %i, exp size: %i)\n", slicePitch, slicePitchExp);
|
|
result = false;
|
|
}
|
|
|
|
size_t width = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_WIDTH, sizeof(size_t), &width, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_WIDTH) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (widthExp != width)
|
|
{
|
|
log_error("Value of CL_IMAGE_WIDTH is different than expected (size: %i, exp size: %i)\n", width, widthExp);
|
|
result = false;
|
|
}
|
|
|
|
size_t height = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_HEIGHT, sizeof(size_t), &height, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_HEIGHT) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (heightExp != height)
|
|
{
|
|
log_error("Value of CL_IMAGE_HEIGHT is different than expected (size: %i, exp size: %i)\n", height, heightExp);
|
|
result = false;
|
|
}
|
|
|
|
size_t depth = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_DEPTH, sizeof(size_t), &depth, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_DEPTH) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (depthExp != depth)
|
|
{
|
|
log_error("Value of CL_IMAGE_DEPTH is different than expected (size: %i, exp size: %i)\n", depth, depthExp);
|
|
result = false;
|
|
}
|
|
|
|
unsigned int plane = 99;
|
|
size_t paramSize = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_DX9_MEDIA_PLANE_KHR, sizeof(unsigned int), &plane, ¶mSize) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_MEDIA_SURFACE_PLANE_KHR) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (planeExp != plane)
|
|
{
|
|
log_error("Value of CL_IMAGE_MEDIA_SURFACE_PLANE_KHR is different than expected (plane: %i, exp plane: %i)\n", plane, planeExp);
|
|
result = false;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool GetMemObjInfo( cl_mem object, cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr<CSurfaceWrapper> &surface, void *shareHandleExp )
|
|
{
|
|
bool result = true;
|
|
switch(adapterType)
|
|
{
|
|
case CL_ADAPTER_D3D9_KHR:
|
|
case CL_ADAPTER_D3D9EX_KHR:
|
|
case CL_ADAPTER_DXVA_KHR:
|
|
{
|
|
#if defined(_WIN32)
|
|
cl_dx9_surface_info_khr surfaceInfo;
|
|
#else
|
|
void *surfaceInfo = 0;
|
|
return false;
|
|
#endif
|
|
size_t paramSize = 0;
|
|
if(clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR, sizeof(surfaceInfo), &surfaceInfo, ¶mSize) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
#if defined(_WIN32)
|
|
CD3D9SurfaceWrapper *d3d9Surface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
if (*d3d9Surface != surfaceInfo.resource)
|
|
{
|
|
log_error("Invalid resource for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
|
|
result = false;
|
|
}
|
|
|
|
if (shareHandleExp != surfaceInfo.shared_handle)
|
|
{
|
|
log_error("Invalid shared handle for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
|
|
result = false;
|
|
}
|
|
#else
|
|
return false;
|
|
#endif
|
|
|
|
if (paramSize != sizeof(surfaceInfo))
|
|
{
|
|
log_error("Invalid CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR parameter size: %i, expected: %i\n", paramSize, sizeof(surfaceInfo));
|
|
result = false;
|
|
}
|
|
|
|
paramSize = 0;
|
|
cl_dx9_media_adapter_type_khr mediaAdapterType;
|
|
if(clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR, sizeof(mediaAdapterType), &mediaAdapterType, ¶mSize) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (adapterType != mediaAdapterType)
|
|
{
|
|
log_error("Invalid media adapter type for CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR\n");
|
|
result = false;
|
|
}
|
|
|
|
if (paramSize != sizeof(mediaAdapterType))
|
|
{
|
|
log_error("Invalid CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR parameter size: %i, expected: %i\n", paramSize, sizeof(mediaAdapterType));
|
|
result = false;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
log_error("GetMemObjInfo(): Unknown adapter type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool ImageInfoVerify( cl_dx9_media_adapter_type_khr adapterType, const std::vector<cl_mem> &memObjList, unsigned int width, unsigned int height,
|
|
std::auto_ptr<CSurfaceWrapper> &surface, void *sharedHandle)
|
|
{
|
|
if (memObjList.size() != 2 && memObjList.size() != 3)
|
|
{
|
|
log_error("ImageInfoVerify(): Invalid object list parameter\n");
|
|
return false;
|
|
}
|
|
|
|
cl_image_format formatPlane;
|
|
formatPlane.image_channel_data_type = CL_UNORM_INT8;
|
|
formatPlane.image_channel_order = CL_R;
|
|
|
|
//plane 0 verification
|
|
if (!GetImageInfo(memObjList[0], formatPlane, sizeof(cl_uchar),
|
|
width * sizeof(cl_uchar),
|
|
0,
|
|
width, height, 0, 0))
|
|
{
|
|
log_error("clGetImageInfo failed\n");
|
|
return false;
|
|
}
|
|
|
|
switch (memObjList.size())
|
|
{
|
|
case 2:
|
|
{
|
|
formatPlane.image_channel_data_type = CL_UNORM_INT8;
|
|
formatPlane.image_channel_order = CL_RG;
|
|
if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar) * 2,
|
|
width * sizeof(cl_uchar),
|
|
0,
|
|
width / 2, height / 2, 0, 1))
|
|
{
|
|
log_error("clGetImageInfo failed\n");
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
case 3:
|
|
{
|
|
if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar),
|
|
width * sizeof(cl_uchar) / 2,
|
|
0,
|
|
width / 2, height / 2, 0, 1))
|
|
{
|
|
log_error("clGetImageInfo failed\n");
|
|
return false;
|
|
}
|
|
|
|
if (!GetImageInfo(memObjList[2], formatPlane, sizeof(cl_uchar),
|
|
width * sizeof(cl_uchar) / 2,
|
|
0,
|
|
width / 2, height / 2, 0, 2))
|
|
{
|
|
log_error("clGetImageInfo failed\n");
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
log_error("ImageInfoVerify(): Invalid object list parameter\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
for (size_t i = 0; i < memObjList.size(); ++i)
|
|
{
|
|
if (!GetMemObjInfo(memObjList[i], adapterType, surface, sharedHandle))
|
|
{
|
|
log_error("clGetMemObjInfo(%i) failed\n", i);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType, const cl_image_format imageFormatCheck)
|
|
{
|
|
cl_uint imageFormatsNum = 0;
|
|
cl_int error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType, 0, 0, &imageFormatsNum);
|
|
if(error != CL_SUCCESS)
|
|
{
|
|
log_error("clGetSupportedImageFormats failed\n");
|
|
return false;
|
|
}
|
|
|
|
if(imageFormatsNum < 1)
|
|
{
|
|
log_error("Invalid image format number returned by clGetSupportedImageFormats\n");
|
|
return false;
|
|
}
|
|
|
|
std::vector<cl_image_format> imageFormats(imageFormatsNum);
|
|
error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType, imageFormatsNum, &imageFormats[0], 0);
|
|
if(error != CL_SUCCESS)
|
|
{
|
|
log_error("clGetSupportedImageFormats failed\n");
|
|
return false;
|
|
}
|
|
|
|
for(cl_uint i = 0; i < imageFormatsNum; ++i)
|
|
{
|
|
if(imageFormats[i].image_channel_data_type == imageFormatCheck.image_channel_data_type
|
|
&& imageFormats[i].image_channel_order == imageFormatCheck.image_channel_order)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
unsigned int ChannelNum( TSurfaceFormat surfaceFormat )
|
|
{
|
|
switch(surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_R32F:
|
|
case SURFACE_FORMAT_R16F:
|
|
case SURFACE_FORMAT_L16:
|
|
case SURFACE_FORMAT_A8:
|
|
case SURFACE_FORMAT_L8:
|
|
return 1;
|
|
break;
|
|
case SURFACE_FORMAT_G32R32F:
|
|
case SURFACE_FORMAT_G16R16F:
|
|
case SURFACE_FORMAT_G16R16:
|
|
case SURFACE_FORMAT_A8L8:
|
|
return 2;
|
|
break;
|
|
case SURFACE_FORMAT_NV12:
|
|
case SURFACE_FORMAT_YV12:
|
|
return 3;
|
|
break;
|
|
case SURFACE_FORMAT_A32B32G32R32F:
|
|
case SURFACE_FORMAT_A16B16G16R16F:
|
|
case SURFACE_FORMAT_A16B16G16R16:
|
|
case SURFACE_FORMAT_A8B8G8R8:
|
|
case SURFACE_FORMAT_X8B8G8R8:
|
|
case SURFACE_FORMAT_A8R8G8B8:
|
|
case SURFACE_FORMAT_X8R8G8B8:
|
|
return 4;
|
|
break;
|
|
default:
|
|
log_error("ChannelNum(): unknown surface format!\n");
|
|
return 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
unsigned int PlanesNum( TSurfaceFormat surfaceFormat )
|
|
{
|
|
switch(surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_R32F:
|
|
case SURFACE_FORMAT_R16F:
|
|
case SURFACE_FORMAT_L16:
|
|
case SURFACE_FORMAT_A8:
|
|
case SURFACE_FORMAT_L8:
|
|
case SURFACE_FORMAT_G32R32F:
|
|
case SURFACE_FORMAT_G16R16F:
|
|
case SURFACE_FORMAT_G16R16:
|
|
case SURFACE_FORMAT_A8L8:
|
|
case SURFACE_FORMAT_A32B32G32R32F:
|
|
case SURFACE_FORMAT_A16B16G16R16F:
|
|
case SURFACE_FORMAT_A16B16G16R16:
|
|
case SURFACE_FORMAT_A8B8G8R8:
|
|
case SURFACE_FORMAT_X8B8G8R8:
|
|
case SURFACE_FORMAT_A8R8G8B8:
|
|
case SURFACE_FORMAT_X8R8G8B8:
|
|
return 1;
|
|
break;
|
|
case SURFACE_FORMAT_NV12:
|
|
return 2;
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
return 3;
|
|
break;
|
|
default:
|
|
log_error("PlanesNum(): unknown surface format!\n");
|
|
return 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
#if defined(_WIN32)
|
|
D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat)
|
|
{
|
|
switch(surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_R32F:
|
|
return D3DFMT_R32F;
|
|
break;
|
|
case SURFACE_FORMAT_R16F:
|
|
return D3DFMT_R16F;
|
|
break;
|
|
case SURFACE_FORMAT_L16:
|
|
return D3DFMT_L16;
|
|
break;
|
|
case SURFACE_FORMAT_A8:
|
|
return D3DFMT_A8;
|
|
break;
|
|
case SURFACE_FORMAT_L8:
|
|
return D3DFMT_L8;
|
|
break;
|
|
case SURFACE_FORMAT_G32R32F:
|
|
return D3DFMT_G32R32F;
|
|
break;
|
|
case SURFACE_FORMAT_G16R16F:
|
|
return D3DFMT_G16R16F;
|
|
break;
|
|
case SURFACE_FORMAT_G16R16:
|
|
return D3DFMT_G16R16;
|
|
break;
|
|
case SURFACE_FORMAT_A8L8:
|
|
return D3DFMT_A8L8;
|
|
break;
|
|
case SURFACE_FORMAT_A32B32G32R32F:
|
|
return D3DFMT_A32B32G32R32F;
|
|
break;
|
|
case SURFACE_FORMAT_A16B16G16R16F:
|
|
return D3DFMT_A16B16G16R16F;
|
|
break;
|
|
case SURFACE_FORMAT_A16B16G16R16:
|
|
return D3DFMT_A16B16G16R16;
|
|
break;
|
|
case SURFACE_FORMAT_A8B8G8R8:
|
|
return D3DFMT_A8B8G8R8;
|
|
break;
|
|
case SURFACE_FORMAT_X8B8G8R8:
|
|
return D3DFMT_X8B8G8R8;
|
|
break;
|
|
case SURFACE_FORMAT_A8R8G8B8:
|
|
return D3DFMT_A8R8G8B8;
|
|
break;
|
|
case SURFACE_FORMAT_X8R8G8B8:
|
|
return D3DFMT_X8R8G8B8;
|
|
break;
|
|
case SURFACE_FORMAT_NV12:
|
|
return static_cast<D3DFORMAT>(MAKEFOURCC('N', 'V', '1', '2'));
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
return static_cast<D3DFORMAT>(MAKEFOURCC('Y', 'V', '1', '2'));
|
|
break;
|
|
default:
|
|
log_error("SurfaceFormatToD3D(): unknown surface format!\n");
|
|
return D3DFMT_R32F;
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
bool DeviceCreate( cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr<CDeviceWrapper> &device )
|
|
{
|
|
switch (adapterType)
|
|
{
|
|
#if defined(_WIN32)
|
|
case CL_ADAPTER_D3D9_KHR:
|
|
device = std::auto_ptr<CDeviceWrapper>(new CD3D9Wrapper());
|
|
break;
|
|
case CL_ADAPTER_D3D9EX_KHR:
|
|
device = std::auto_ptr<CDeviceWrapper>(new CD3D9ExWrapper());
|
|
break;
|
|
case CL_ADAPTER_DXVA_KHR:
|
|
device = std::auto_ptr<CDeviceWrapper>(new CDXVAWrapper());
|
|
break;
|
|
#endif
|
|
default:
|
|
log_error("DeviceCreate(): Unknown adapter type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return device->Status();
|
|
}
|
|
|
|
bool SurfaceFormatCheck( cl_dx9_media_adapter_type_khr adapterType, const CDeviceWrapper &device, TSurfaceFormat surfaceFormat )
|
|
{
|
|
switch (adapterType)
|
|
{
|
|
#if defined(_WIN32)
|
|
case CL_ADAPTER_D3D9_KHR:
|
|
case CL_ADAPTER_D3D9EX_KHR:
|
|
case CL_ADAPTER_DXVA_KHR:
|
|
{
|
|
D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
|
|
LPDIRECT3D9 d3d9 = static_cast<LPDIRECT3D9>(device.D3D());
|
|
D3DDISPLAYMODE d3ddm;
|
|
d3d9->GetAdapterDisplayMode(device.AdapterIdx(), &d3ddm);
|
|
|
|
if( FAILED(d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_SURFACE, d3dFormat)) )
|
|
return false;
|
|
}
|
|
break;
|
|
#endif
|
|
default:
|
|
log_error("SurfaceFormatCheck(): Unknown adapter type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format)
|
|
{
|
|
switch(surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_R32F:
|
|
format.image_channel_order = CL_R;
|
|
format.image_channel_data_type = CL_FLOAT;
|
|
break;
|
|
case SURFACE_FORMAT_R16F:
|
|
format.image_channel_order = CL_R;
|
|
format.image_channel_data_type = CL_HALF_FLOAT;
|
|
break;
|
|
case SURFACE_FORMAT_L16:
|
|
format.image_channel_order = CL_R;
|
|
format.image_channel_data_type = CL_UNORM_INT16;
|
|
break;
|
|
case SURFACE_FORMAT_A8:
|
|
format.image_channel_order = CL_A;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_L8:
|
|
format.image_channel_order = CL_R;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_G32R32F:
|
|
format.image_channel_order = CL_RG;
|
|
format.image_channel_data_type = CL_FLOAT;
|
|
break;
|
|
case SURFACE_FORMAT_G16R16F:
|
|
format.image_channel_order = CL_RG;
|
|
format.image_channel_data_type = CL_HALF_FLOAT;
|
|
break;
|
|
case SURFACE_FORMAT_G16R16:
|
|
format.image_channel_order = CL_RG;
|
|
format.image_channel_data_type = CL_UNORM_INT16;
|
|
break;
|
|
case SURFACE_FORMAT_A8L8:
|
|
format.image_channel_order = CL_RG;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_A32B32G32R32F:
|
|
format.image_channel_order = CL_RGBA;
|
|
format.image_channel_data_type = CL_FLOAT;
|
|
break;
|
|
case SURFACE_FORMAT_A16B16G16R16F:
|
|
format.image_channel_order = CL_RGBA;
|
|
format.image_channel_data_type = CL_HALF_FLOAT;
|
|
break;
|
|
case SURFACE_FORMAT_A16B16G16R16:
|
|
format.image_channel_order = CL_RGBA;
|
|
format.image_channel_data_type = CL_UNORM_INT16;
|
|
break;
|
|
case SURFACE_FORMAT_A8B8G8R8:
|
|
format.image_channel_order = CL_RGBA;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_X8B8G8R8:
|
|
format.image_channel_order = CL_RGBA;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_A8R8G8B8:
|
|
format.image_channel_order = CL_BGRA;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_X8R8G8B8:
|
|
format.image_channel_order = CL_BGRA;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_NV12:
|
|
format.image_channel_order = CL_R;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
format.image_channel_order = CL_R;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
default:
|
|
log_error("SurfaceFormatToOCL(): Unknown surface format!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void SurfaceFormatToString( TSurfaceFormat surfaceFormat, std::string &str )
|
|
{
|
|
switch(surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_R32F:
|
|
str = "R32F";
|
|
break;
|
|
case SURFACE_FORMAT_R16F:
|
|
str = "R16F";
|
|
break;
|
|
case SURFACE_FORMAT_L16:
|
|
str = "L16";
|
|
break;
|
|
case SURFACE_FORMAT_A8:
|
|
str = "A8";
|
|
break;
|
|
case SURFACE_FORMAT_L8:
|
|
str = "L8";
|
|
break;
|
|
case SURFACE_FORMAT_G32R32F:
|
|
str = "G32R32F";
|
|
break;
|
|
case SURFACE_FORMAT_G16R16F:
|
|
str = "G16R16F";
|
|
break;
|
|
case SURFACE_FORMAT_G16R16:
|
|
str = "G16R16";
|
|
break;
|
|
case SURFACE_FORMAT_A8L8:
|
|
str = "A8L8";
|
|
break;
|
|
case SURFACE_FORMAT_A32B32G32R32F:
|
|
str = "A32B32G32R32F";
|
|
break;
|
|
case SURFACE_FORMAT_A16B16G16R16F:
|
|
str = "A16B16G16R16F";
|
|
break;
|
|
case SURFACE_FORMAT_A16B16G16R16:
|
|
str = "A16B16G16R16";
|
|
break;
|
|
case SURFACE_FORMAT_A8B8G8R8:
|
|
str = "A8B8G8R8";
|
|
break;
|
|
case SURFACE_FORMAT_X8B8G8R8:
|
|
str = "X8B8G8R8";
|
|
break;
|
|
case SURFACE_FORMAT_A8R8G8B8:
|
|
str = "A8R8G8B8";
|
|
break;
|
|
case SURFACE_FORMAT_X8R8G8B8:
|
|
str = "X8R8G8B8";
|
|
break;
|
|
case SURFACE_FORMAT_NV12:
|
|
str = "NV12";
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
str = "YV12";
|
|
break;
|
|
default:
|
|
log_error("SurfaceFormatToString(): unknown surface format!\n");
|
|
str = "unknown";
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType, unsigned int width, unsigned int height, TSurfaceFormat surfaceFormat,
|
|
CDeviceWrapper &device, std::auto_ptr<CSurfaceWrapper> &surface, bool sharedHandle, void **objectSharedHandle)
|
|
{
|
|
switch (adapterType)
|
|
{
|
|
#if defined(_WIN32)
|
|
case CL_ADAPTER_D3D9_KHR:
|
|
{
|
|
surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
HRESULT hr = 0;
|
|
D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
|
|
LPDIRECT3DDEVICE9 d3d9Device = (LPDIRECT3DDEVICE9)device.Device();
|
|
hr = d3d9Device->CreateOffscreenPlainSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
|
|
sharedHandle ? objectSharedHandle: 0);
|
|
|
|
if ( FAILED(hr))
|
|
{
|
|
log_error("CreateOffscreenPlainSurface failed\n");
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
case CL_ADAPTER_D3D9EX_KHR:
|
|
{
|
|
surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
HRESULT hr = 0;
|
|
D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
|
|
LPDIRECT3DDEVICE9EX d3d9ExDevice = (LPDIRECT3DDEVICE9EX)device.Device();
|
|
hr = d3d9ExDevice->CreateOffscreenPlainSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
|
|
sharedHandle ? objectSharedHandle: 0);
|
|
|
|
if ( FAILED(hr))
|
|
{
|
|
log_error("CreateOffscreenPlainSurface failed\n");
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
case CL_ADAPTER_DXVA_KHR:
|
|
{
|
|
surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
HRESULT hr = 0;
|
|
D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
|
|
IDXVAHD_Device *dxvaDevice = (IDXVAHD_Device *)device.Device();
|
|
hr = dxvaDevice->CreateVideoSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, 0,
|
|
DXVAHD_SURFACE_TYPE_VIDEO_INPUT, 1, &(*d3dSurface), sharedHandle ? objectSharedHandle: 0);
|
|
|
|
if ( FAILED(hr))
|
|
{
|
|
log_error("CreateVideoSurface failed\n");
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
#endif
|
|
default:
|
|
log_error("MediaSurfaceCreate(): Unknown adapter type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
int DetectFloatToHalfRoundingMode( cl_command_queue q ) // Returns CL_SUCCESS on success
|
|
{
|
|
cl_int err = CL_SUCCESS;
|
|
|
|
if( gFloatToHalfRoundingMode == kDefaultRoundingMode )
|
|
{
|
|
// Some numbers near 0.5f, that we look at to see how the values are rounded.
|
|
static const cl_uint inData[4*4] = { 0x3f000fffU, 0x3f001000U, 0x3f001001U, 0U, 0x3f001fffU, 0x3f002000U, 0x3f002001U, 0U,
|
|
0x3f002fffU, 0x3f003000U, 0x3f003001U, 0U, 0x3f003fffU, 0x3f004000U, 0x3f004001U, 0U };
|
|
static const size_t count = sizeof( inData ) / (4*sizeof( inData[0] ));
|
|
const float *inp = (const float*) inData;
|
|
cl_context context = NULL;
|
|
|
|
// Create an input buffer
|
|
err = clGetCommandQueueInfo( q, CL_QUEUE_CONTEXT, sizeof(context), &context, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not get context from command queue in DetectFloatToHalfRoundingMode (%d)", err );
|
|
return err;
|
|
}
|
|
|
|
cl_mem inBuf = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, sizeof( inData ), (void*) inData, &err );
|
|
if( NULL == inBuf || err )
|
|
{
|
|
log_error( "Error: could not create input buffer in DetectFloatToHalfRoundingMode (err: %d)", err );
|
|
return err;
|
|
}
|
|
|
|
// Create a small output image
|
|
cl_image_format fmt = { CL_RGBA, CL_HALF_FLOAT };
|
|
cl_image_desc imageDesc;
|
|
imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
|
imageDesc.image_width = count;
|
|
imageDesc.image_height = 1;
|
|
imageDesc.image_depth = 0;
|
|
imageDesc.image_array_size = 0;
|
|
imageDesc.image_row_pitch = 0;
|
|
imageDesc.image_slice_pitch = 0;
|
|
imageDesc.num_mip_levels = 0;
|
|
imageDesc.num_samples = 0;
|
|
imageDesc.buffer = 0;
|
|
|
|
cl_mem outImage = clCreateImage(context, CL_MEM_READ_WRITE, &fmt, &imageDesc, 0, &err);
|
|
if( NULL == outImage || err )
|
|
{
|
|
log_error( "Error: could not create half float out image in DetectFloatToHalfRoundingMode (err: %d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
return err;
|
|
}
|
|
|
|
// Create our program, and a kernel
|
|
const char *kernel[1] = {
|
|
"kernel void detect_round( global float4 *in, write_only image2d_t out )\n"
|
|
"{\n"
|
|
" write_imagef( out, (int2)(get_global_id(0),0), in[get_global_id(0)] );\n"
|
|
"}\n" };
|
|
cl_program program = clCreateProgramWithSource( context, 1, kernel, NULL, &err );
|
|
if( NULL == program || err )
|
|
{
|
|
log_error( "Error: could not create program in DetectFloatToHalfRoundingMode (err: %d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
return err;
|
|
}
|
|
|
|
cl_device_id device = NULL;
|
|
err = clGetCommandQueueInfo( q, CL_QUEUE_DEVICE, sizeof(device), &device, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not get device from command queue in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
return err;
|
|
}
|
|
|
|
err = clBuildProgram( program, 1, &device, "", NULL, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not build program in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
return err;
|
|
}
|
|
|
|
cl_kernel k = clCreateKernel( program, "detect_round", &err );
|
|
if( NULL == k || err )
|
|
{
|
|
log_error( "Error: could not create kernel in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
return err;
|
|
}
|
|
|
|
err = clSetKernelArg( k, 0, sizeof( cl_mem ), &inBuf );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not set argument 0 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
err = clSetKernelArg( k, 1, sizeof( cl_mem ), &outImage );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not set argument 1 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
// Run the kernel
|
|
size_t global_work_size = count;
|
|
err = clEnqueueNDRangeKernel( q, k, 1, NULL, &global_work_size, NULL, 0, NULL, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not enqueue kernel in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
// read the results
|
|
cl_ushort outBuf[count*4];
|
|
memset( outBuf, -1, sizeof( outBuf ) );
|
|
size_t origin[3] = {0,0,0};
|
|
size_t region[3] = {count,1,1};
|
|
err = clEnqueueReadImage( q, outImage, CL_TRUE, origin, region, 0, 0, outBuf, 0, NULL, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not read output image in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
// Generate our list of reference results
|
|
cl_ushort rte_ref[count*4];
|
|
cl_ushort rtz_ref[count*4];
|
|
for( size_t i = 0; i < 4 * count; i++ )
|
|
{
|
|
rte_ref[i] = float2half_rte( inp[i] );
|
|
rtz_ref[i] = float2half_rtz( inp[i] );
|
|
}
|
|
|
|
// Verify that we got something in either rtz or rte mode
|
|
if( 0 == memcmp( rte_ref, outBuf, sizeof( rte_ref )) )
|
|
{
|
|
log_info( "Autodetected float->half rounding mode to be rte\n" );
|
|
gFloatToHalfRoundingMode = kRoundToNearestEven;
|
|
}
|
|
else if ( 0 == memcmp( rtz_ref, outBuf, sizeof( rtz_ref )) )
|
|
{
|
|
log_info( "Autodetected float->half rounding mode to be rtz\n" );
|
|
gFloatToHalfRoundingMode = kRoundTowardZero;
|
|
}
|
|
else
|
|
{
|
|
log_error( "ERROR: float to half conversions proceed with invalid rounding mode!\n" );
|
|
log_info( "\nfor:" );
|
|
for( size_t i = 0; i < count; i++ )
|
|
log_info( " {%a, %a, %a, %a},", inp[4*i], inp[4*i+1], inp[4*i+2], inp[4*i+3] );
|
|
log_info( "\ngot:" );
|
|
for( size_t i = 0; i < count; i++ )
|
|
log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", outBuf[4*i], outBuf[4*i+1], outBuf[4*i+2], outBuf[4*i+3] );
|
|
log_info( "\nrte:" );
|
|
for( size_t i = 0; i < count; i++ )
|
|
log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rte_ref[4*i], rte_ref[4*i+1], rte_ref[4*i+2], rte_ref[4*i+3] );
|
|
log_info( "\nrtz:" );
|
|
for( size_t i = 0; i < count; i++ )
|
|
log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rtz_ref[4*i], rtz_ref[4*i+1], rtz_ref[4*i+2], rtz_ref[4*i+3] );
|
|
log_info( "\n" );
|
|
err = -1;
|
|
gFloatToHalfRoundingMode = kRoundingModeCount; // illegal value
|
|
}
|
|
|
|
// clean up
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
// Make sure that the rounding mode was successfully detected, if we checked earlier
|
|
if( gFloatToHalfRoundingMode != kRoundToNearestEven && gFloatToHalfRoundingMode != kRoundTowardZero)
|
|
return -2;
|
|
|
|
return err;
|
|
}
|
|
|
|
cl_ushort convert_float_to_half( float f )
|
|
{
|
|
switch( gFloatToHalfRoundingMode )
|
|
{
|
|
case kRoundToNearestEven:
|
|
return float2half_rte( f );
|
|
case kRoundTowardZero:
|
|
return float2half_rtz( f );
|
|
default:
|
|
log_error( "ERROR: Test internal error -- unhandled or unknown float->half rounding mode.\n" );
|
|
exit(-1);
|
|
return 0xffff;
|
|
}
|
|
|
|
}
|
|
|
|
cl_ushort float2half_rte( float f )
|
|
{
|
|
union{ float f; cl_uint u; } u = {f};
|
|
cl_uint sign = (u.u >> 16) & 0x8000;
|
|
float x = fabsf(f);
|
|
|
|
//Nan
|
|
if( x != x )
|
|
{
|
|
u.u >>= (24-11);
|
|
u.u &= 0x7fff;
|
|
u.u |= 0x0200; //silence the NaN
|
|
return u.u | sign;
|
|
}
|
|
|
|
// overflow
|
|
if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
|
|
return 0x7c00 | sign;
|
|
|
|
// underflow
|
|
if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
|
|
return sign; // The halfway case can return 0x0001 or 0. 0 is even.
|
|
|
|
// very small
|
|
if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
|
|
return sign | 1;
|
|
|
|
// half denormal
|
|
if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
|
|
{
|
|
u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
|
|
return sign | u.u;
|
|
}
|
|
|
|
u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
|
|
u.u &= 0x7f800000;
|
|
x += u.f;
|
|
u.f = x - u.f;
|
|
u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
|
|
|
|
return (u.u >> (24-11)) | sign;
|
|
}
|
|
|
|
cl_ushort float2half_rtz( float f )
|
|
{
|
|
union{ float f; cl_uint u; } u = {f};
|
|
cl_uint sign = (u.u >> 16) & 0x8000;
|
|
float x = fabsf(f);
|
|
|
|
//Nan
|
|
if( x != x )
|
|
{
|
|
u.u >>= (24-11);
|
|
u.u &= 0x7fff;
|
|
u.u |= 0x0200; //silence the NaN
|
|
return u.u | sign;
|
|
}
|
|
|
|
// overflow
|
|
if( x >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
|
|
{
|
|
if( x == INFINITY )
|
|
return 0x7c00 | sign;
|
|
|
|
return 0x7bff | sign;
|
|
}
|
|
|
|
// underflow
|
|
if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
|
|
return sign; // The halfway case can return 0x0001 or 0. 0 is even.
|
|
|
|
// half denormal
|
|
if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
|
|
{
|
|
x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
|
|
return (cl_ushort)((int) x | sign);
|
|
}
|
|
|
|
u.u &= 0xFFFFE000U;
|
|
u.u -= 0x38000000U;
|
|
|
|
return (u.u >> (24-11)) | sign;
|
|
}
|
|
|
|
float convert_half_to_float( unsigned short halfValue )
|
|
{
|
|
// We have to take care of a few special cases, but in general, we just extract
|
|
// the same components from the half that exist in the float and re-stuff them
|
|
// For a description of the actual half format, see http://en.wikipedia.org/wiki/Half_precision
|
|
// Note: we store these in 32-bit ints to make the bit manipulations easier later
|
|
int sign = ( halfValue >> 15 ) & 0x0001;
|
|
int exponent = ( halfValue >> 10 ) & 0x001f;
|
|
int mantissa = ( halfValue ) & 0x03ff;
|
|
|
|
// Note: we use a union here to be able to access the bits of a float directly
|
|
union
|
|
{
|
|
unsigned int bits;
|
|
float floatValue;
|
|
} outFloat;
|
|
|
|
// Special cases first
|
|
if( exponent == 0 )
|
|
{
|
|
if( mantissa == 0 )
|
|
{
|
|
// If both exponent and mantissa are 0, the number is +/- 0
|
|
outFloat.bits = sign << 31;
|
|
return outFloat.floatValue; // Already done!
|
|
}
|
|
|
|
// If exponent is 0, it's a denormalized number, so we renormalize it
|
|
// Note: this is not terribly efficient, but oh well
|
|
while( ( mantissa & 0x00000400 ) == 0 )
|
|
{
|
|
mantissa <<= 1;
|
|
exponent--;
|
|
}
|
|
|
|
// The first bit is implicit, so we take it off and inc the exponent accordingly
|
|
exponent++;
|
|
mantissa &= ~(0x00000400);
|
|
}
|
|
else if( exponent == 31 ) // Special-case "numbers"
|
|
{
|
|
// If the exponent is 31, it's a special case number (+/- infinity or NAN).
|
|
// If the mantissa is 0, it's infinity, else it's NAN, but in either case, the packing
|
|
// method is the same
|
|
outFloat.bits = ( sign << 31 ) | 0x7f800000 | ( mantissa << 13 );
|
|
return outFloat.floatValue;
|
|
}
|
|
|
|
// Plain ol' normalized number, so adjust to the ranges a 32-bit float expects and repack
|
|
exponent += ( 127 - 15 );
|
|
mantissa <<= 13;
|
|
|
|
outFloat.bits = ( sign << 31 ) | ( exponent << 23 ) | mantissa;
|
|
return outFloat.floatValue;
|
|
} |