mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 22:19:02 +00:00
1938 lines
58 KiB
C++
1938 lines
58 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "utils.h"
|
|
|
|
#include "../../test_common/harness/errorHelpers.h"
|
|
#include "../../test_common/harness/rounding_mode.h"
|
|
|
|
#include <math.h>
|
|
|
|
static RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
|
|
|
|
|
|
CResult::CResult():
|
|
_result(TEST_PASS), _resultLast(TEST_NORESULT)
|
|
{
|
|
|
|
}
|
|
|
|
CResult::~CResult()
|
|
{
|
|
|
|
}
|
|
|
|
CResult::TTestResult CResult::ResultLast() const
|
|
{
|
|
return _resultLast;
|
|
}
|
|
|
|
int CResult::Result() const
|
|
{
|
|
switch (_result)
|
|
{
|
|
case TEST_NORESULT:
|
|
case TEST_NOTSUPPORTED:
|
|
case TEST_PASS:
|
|
return 0;
|
|
break;
|
|
case TEST_FAIL:
|
|
return 1;
|
|
break;
|
|
case TEST_ERROR:
|
|
return 2;
|
|
break;
|
|
default:
|
|
return -1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void CResult::ResultSub( TTestResult result )
|
|
{
|
|
_resultLast = result;
|
|
if (static_cast<int>(result) > static_cast<int>(_result))
|
|
_result = result;
|
|
}
|
|
|
|
bool ExtensionCheck(const std::string &extension, cl_device_id deviceID)
|
|
{
|
|
std::string extensions;
|
|
size_t size = 0;
|
|
cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, 0, &size);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "clGetDeviceInfo failed\n");
|
|
return false;
|
|
}
|
|
|
|
if (size == 0)
|
|
{
|
|
print_error(error, "Invalid extension string size\n");
|
|
return false;
|
|
}
|
|
|
|
extensions.resize(size);
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, size, &extensions[0], 0);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "clGetDeviceInfo failed\n");
|
|
return false;
|
|
}
|
|
|
|
if (extensions.find(extension) != std::string::npos)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
void FunctionContextCreateToString(TContextFuncType contextCreateFunction, std::string &contextFunction)
|
|
{
|
|
switch(contextCreateFunction)
|
|
{
|
|
case CONTEXT_CREATE_DEFAULT:
|
|
contextFunction = "CreateContext";
|
|
break;
|
|
case CONTEXT_CREATE_FROM_TYPE:
|
|
contextFunction = "CreateContextFromType";
|
|
break;
|
|
default:
|
|
contextFunction = "Unknown";
|
|
log_error("FunctionContextCreateToString(): Unknown create function enum!");
|
|
break;
|
|
}
|
|
}
|
|
|
|
void AdapterToString(cl_dx9_media_adapter_type_khr adapterType, std::string &adapter)
|
|
{
|
|
switch(adapterType)
|
|
{
|
|
case CL_ADAPTER_D3D9_KHR:
|
|
adapter = "D3D9";
|
|
break;
|
|
case CL_ADAPTER_D3D9EX_KHR:
|
|
adapter = "D3D9EX";
|
|
break;
|
|
case CL_ADAPTER_DXVA_KHR:
|
|
adapter = "DXVA";
|
|
break;
|
|
default:
|
|
adapter = "Unknown";
|
|
log_error("AdapterToString(): Unknown adapter type!");
|
|
break;
|
|
}
|
|
}
|
|
|
|
cl_context_info AdapterTypeToContextInfo( cl_dx9_media_adapter_type_khr adapterType )
|
|
{
|
|
switch (adapterType)
|
|
{
|
|
case CL_ADAPTER_D3D9_KHR:
|
|
return CL_CONTEXT_ADAPTER_D3D9_KHR;
|
|
break;
|
|
case CL_ADAPTER_D3D9EX_KHR:
|
|
return CL_CONTEXT_ADAPTER_D3D9EX_KHR;
|
|
break;
|
|
case CL_ADAPTER_DXVA_KHR:
|
|
return CL_CONTEXT_ADAPTER_DXVA_KHR;
|
|
break;
|
|
default:
|
|
log_error("AdapterTypeToContextInfo(): Unknown adapter type!");
|
|
return 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void YUVGenerateNV12( std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
|
|
cl_uchar valueMin, cl_uchar valueMax, double valueAdd )
|
|
{
|
|
yuv.clear();
|
|
yuv.resize(width * height * 3 / 2, 0);
|
|
|
|
double min = static_cast<double>(valueMin);
|
|
double max = static_cast<double>(valueMax);
|
|
double range = 255;
|
|
double add = static_cast<double>(valueAdd * range);
|
|
double stepX = (max - min) / static_cast<double>(width);
|
|
double stepY = (max - min) /static_cast<double>(height);
|
|
|
|
//generate Y plane
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
unsigned int offset = i * width;
|
|
double valueYPlane0 = static_cast<double>(stepY * i);
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
double valueXPlane0 = static_cast<double>(stepX * j);
|
|
yuv.at(offset + j) = static_cast<cl_uchar>(min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
|
|
}
|
|
}
|
|
|
|
//generate UV planes
|
|
for (unsigned int i = 0; i < height / 2; ++i)
|
|
{
|
|
unsigned int offset = width * height + i * width;
|
|
double valueYPlane1 = static_cast<double>(stepY * i);
|
|
double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
|
|
for (unsigned int j = 0; j < width / 2; ++j)
|
|
{
|
|
double valueXPlane1 = static_cast<double>(stepX * j);
|
|
double valueXPlane2 = static_cast<double>(stepX * (width / 2 + j));
|
|
|
|
yuv.at(offset + j * 2) = static_cast<cl_uchar>(min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
|
|
yuv.at(offset + j * 2 + 1) = static_cast<cl_uchar>(min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
|
|
}
|
|
}
|
|
}
|
|
|
|
void YUVGenerateYV12( std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height, cl_uchar valueMin, cl_uchar valueMax, double valueAdd /*= 0.0*/ )
|
|
{
|
|
yuv.clear();
|
|
yuv.resize(width * height * 3 / 2, 0);
|
|
|
|
double min = static_cast<double>(valueMin);
|
|
double max = static_cast<double>(valueMax);
|
|
double range = 255;
|
|
double add = static_cast<double>(valueAdd * range);
|
|
double stepX = (max - min) / static_cast<double>(width);
|
|
double stepY = (max - min) /static_cast<double>(height);
|
|
|
|
unsigned offset = 0;
|
|
|
|
//generate Y plane
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
unsigned int plane0Offset = offset + i * width;
|
|
double valueYPlane0 = static_cast<double>(stepY * i);
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
double valueXPlane0 = static_cast<double>(stepX * j);
|
|
yuv.at(plane0Offset + j) = static_cast<cl_uchar>(min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
|
|
}
|
|
}
|
|
|
|
//generate V plane
|
|
offset += width * height;
|
|
for (unsigned int i = 0; i < height / 2; ++i)
|
|
{
|
|
unsigned int plane1Offset = offset + i * width / 2;
|
|
double valueYPlane1 = static_cast<double>(stepY * i);
|
|
for (unsigned int j = 0; j < width / 2; ++j)
|
|
{
|
|
double valueXPlane1 = static_cast<double>(stepX * j);
|
|
yuv.at(plane1Offset + j) = static_cast<cl_uchar>(min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
|
|
}
|
|
}
|
|
|
|
//generate U plane
|
|
offset += width * height / 4;
|
|
for (unsigned int i = 0; i < height / 2; ++i)
|
|
{
|
|
unsigned int plane2Offset = offset + i * width / 2;
|
|
double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
|
|
for (unsigned int j = 0; j < width / 2; ++j)
|
|
{
|
|
double valueXPlane2 = static_cast<double>(stepX * j);
|
|
yuv.at(plane2Offset + j) = static_cast<cl_uchar>(min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
bool YUVGenerate( TSurfaceFormat surfaceFormat, std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height, cl_uchar valueMin, cl_uchar valueMax, double valueAdd /*= 0.0*/ )
|
|
{
|
|
switch (surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_NV12:
|
|
YUVGenerateNV12(yuv, width, height, valueMin, valueMax, valueAdd);
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
YUVGenerateYV12(yuv, width, height, valueMin, valueMax, valueAdd);
|
|
break;
|
|
default:
|
|
log_error("YUVGenerate(): Invalid surface type\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool YUVSurfaceSetNV12( std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
#if defined(_WIN32)
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
D3DLOCKED_RECT rect;
|
|
if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
|
|
{
|
|
log_error("YUVSurfaceSetNV12(): Surface lock failed\n");
|
|
return false;
|
|
}
|
|
|
|
size_t pitch = rect.Pitch / sizeof(cl_uchar);
|
|
size_t lineSize = width * sizeof(cl_uchar);
|
|
cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
|
|
for (size_t y = 0; y < height; ++y)
|
|
memcpy(ptr + y * pitch, &yuv.at(y * width), lineSize);
|
|
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
memcpy(ptr + height * pitch + y * pitch, &yuv.at(width * height + y * width), lineSize);
|
|
|
|
(*d3dSurface)->UnlockRect();
|
|
|
|
return true;
|
|
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
bool YUVSurfaceSetYV12( std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
#if defined(_WIN32)
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
D3DLOCKED_RECT rect;
|
|
if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
|
|
{
|
|
log_error("YUVSurfaceSetYV12(): Surface lock failed!\n");
|
|
return false;
|
|
}
|
|
|
|
size_t pitch = rect.Pitch / sizeof(cl_uchar);
|
|
size_t pitchHalf = pitch / 2;
|
|
size_t lineSize = width * sizeof(cl_uchar);
|
|
size_t lineHalfSize = lineSize / 2;
|
|
size_t surfaceOffset = 0;
|
|
size_t yuvOffset = 0;
|
|
cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
|
|
|
|
for (size_t y = 0; y < height; ++y)
|
|
memcpy(ptr + surfaceOffset + y * pitch, &yuv.at(yuvOffset + y * width), lineSize);
|
|
|
|
surfaceOffset += height * pitch;
|
|
yuvOffset += width * height;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
memcpy(ptr + surfaceOffset + y * pitchHalf, &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize);
|
|
|
|
surfaceOffset += pitchHalf * height / 2;
|
|
yuvOffset += width * height / 4;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
memcpy(ptr + surfaceOffset + y * pitchHalf, &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize);
|
|
|
|
(*d3dSurface)->UnlockRect();
|
|
|
|
return true;
|
|
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
bool YUVSurfaceSet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height )
|
|
{
|
|
switch (surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_NV12:
|
|
if(!YUVSurfaceSetNV12(surface, yuv, width, height))
|
|
return false;
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
if(!YUVSurfaceSetYV12(surface, yuv, width, height))
|
|
return false;
|
|
break;
|
|
default:
|
|
log_error("YUVSurfaceSet(): Invalid surface type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool YUVSurfaceGetNV12( std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
#if defined(_WIN32)
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
D3DLOCKED_RECT rect;
|
|
if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
|
|
{
|
|
log_error("YUVSurfaceGetNV12(): Surface lock failed!\n");
|
|
return false;
|
|
}
|
|
|
|
size_t pitch = rect.Pitch / sizeof(cl_uchar);
|
|
size_t lineSize = width * sizeof(cl_uchar);
|
|
cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
|
|
size_t yuvOffset = 0;
|
|
size_t surfaceOffset = 0;
|
|
for (size_t y = 0; y < height; ++y)
|
|
memcpy(&yuv.at(yuvOffset + y * width), ptr + y * pitch, lineSize);
|
|
|
|
yuvOffset += width * height;
|
|
surfaceOffset += pitch * height;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch, lineSize);
|
|
|
|
(*d3dSurface)->UnlockRect();
|
|
|
|
return true;
|
|
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
bool YUVSurfaceGetYV12( std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height )
|
|
{
|
|
#if defined(_WIN32)
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
D3DLOCKED_RECT rect;
|
|
if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
|
|
{
|
|
log_error("YUVSurfaceGetYV12(): Surface lock failed!\n");
|
|
return false;
|
|
}
|
|
|
|
size_t pitch = rect.Pitch / sizeof(cl_uchar);
|
|
size_t pitchHalf = pitch / 2;
|
|
size_t lineSize = width * sizeof(cl_uchar);
|
|
size_t lineHalfSize = lineSize / 2;
|
|
size_t surfaceOffset = 0;
|
|
size_t yuvOffset = 0;
|
|
cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
|
|
|
|
for (size_t y = 0; y < height; ++y)
|
|
memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch, lineSize);
|
|
|
|
surfaceOffset += pitch * height;
|
|
yuvOffset += width * height;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
memcpy(&yuv.at(yuvOffset + y * lineHalfSize), ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
|
|
|
|
surfaceOffset += pitchHalf * height / 2;
|
|
yuvOffset += width * height / 4;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
memcpy(&yuv.at(yuvOffset + y * lineHalfSize), ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
|
|
|
|
(*d3dSurface)->UnlockRect();
|
|
|
|
return true;
|
|
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
bool YUVSurfaceGet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
switch (surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_NV12:
|
|
if(!YUVSurfaceGetNV12(surface, yuv, width, height))
|
|
return false;
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
if(!YUVSurfaceGetYV12(surface, yuv, width, height))
|
|
return false;
|
|
break;
|
|
default:
|
|
log_error("YUVSurfaceGet(): Invalid surface type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool YUVCompareNV12( const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
//plane 0 verification
|
|
size_t offset = 0;
|
|
for (size_t y = 0; y < height; ++y)
|
|
{
|
|
size_t plane0Offset = offset + width * y;
|
|
for (size_t x = 0; x < width; ++x)
|
|
{
|
|
if (yuvTest[plane0Offset + x] != yuvRef[plane0Offset + x])
|
|
{
|
|
log_error("Plane 0 (Y) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
|
|
yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], x, y);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
//plane 1 and 2 verification
|
|
offset += width * height;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
{
|
|
size_t plane12Offset = offset + width * y;
|
|
for (size_t x = 0; x < width / 2; ++x)
|
|
{
|
|
if (yuvTest.at(plane12Offset + 2 * x) != yuvRef.at(plane12Offset + 2 * x))
|
|
{
|
|
log_error("Plane 1 (U) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
|
|
yuvRef[plane12Offset + 2 * x], yuvTest[plane12Offset + 2 * x], x, y);
|
|
return false;
|
|
}
|
|
|
|
if (yuvTest.at(plane12Offset + 2 * x + 1) != yuvRef.at(plane12Offset + 2 * x + 1))
|
|
{
|
|
log_error("Plane 2 (V) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
|
|
yuvRef[plane12Offset + 2 * x + 1], yuvTest[plane12Offset + 2 * x + 1], x, y);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool YUVCompareYV12( const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
//plane 0 verification
|
|
size_t offset = 0;
|
|
for (size_t y = 0; y < height; ++y)
|
|
{
|
|
size_t plane0Offset = width * y;
|
|
for (size_t x = 0; x < width; ++x)
|
|
{
|
|
if (yuvTest.at(plane0Offset + x) != yuvRef.at(plane0Offset + x))
|
|
{
|
|
log_error("Plane 0 (Y) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
|
|
yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], x ,y);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
//plane 1 verification
|
|
offset += width * height;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
{
|
|
size_t plane1Offset = offset + width * y / 2;
|
|
for (size_t x = 0; x < width / 2; ++x)
|
|
{
|
|
if (yuvTest.at(plane1Offset + x) != yuvRef.at(plane1Offset + x))
|
|
{
|
|
log_error("Plane 1 (V) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
|
|
yuvRef[plane1Offset + x], yuvTest[plane1Offset + x], x, y);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
//plane 2 verification
|
|
offset += width * height / 4;
|
|
for (size_t y = 0; y < height / 2; ++y)
|
|
{
|
|
size_t plane2Offset = offset + width * y / 2;
|
|
for (size_t x = 0; x < width / 2; ++x)
|
|
{
|
|
if (yuvTest.at(plane2Offset + x) != yuvRef.at(plane2Offset + x))
|
|
{
|
|
log_error("Plane 2 (U) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
|
|
yuvRef[plane2Offset + x], yuvTest[plane2Offset + x], x, y);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool YUVCompare( TSurfaceFormat surfaceFormat, const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
|
|
unsigned int width, unsigned int height )
|
|
{
|
|
switch (surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_NV12:
|
|
if (!YUVCompareNV12(yuvTest, yuvRef, width, height))
|
|
{
|
|
log_error("OCL object is different than expected!\n");
|
|
return false;
|
|
}
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
if (!YUVCompareYV12(yuvTest, yuvRef, width, height))
|
|
{
|
|
log_error("OCL object is different than expected!\n");
|
|
return false;
|
|
}
|
|
break;
|
|
default:
|
|
log_error("YUVCompare(): Invalid surface type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<float> &data, unsigned int width, unsigned int height,
|
|
unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
|
|
{
|
|
data.clear();
|
|
data.reserve(width * height * channelNum);
|
|
|
|
double valueMin = static_cast<double>(cmin);
|
|
double valueMax = static_cast<double>(cmax);
|
|
double stepX = (valueMax - valueMin) / static_cast<double>(width);
|
|
double stepY = (valueMax - valueMin) /static_cast<double>(height);
|
|
double valueAdd = static_cast<double>(add);
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
double valueY = static_cast<double>(stepY * i);
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
double valueX = static_cast<double>(stepX * j);
|
|
switch (channelNum)
|
|
{
|
|
case 1:
|
|
data.push_back(static_cast<float>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
|
|
break;
|
|
case 2:
|
|
data.push_back(static_cast<float>(valueMin + valueX + valueAdd));
|
|
data.push_back(static_cast<float>(valueMin + valueY + valueAdd));
|
|
break;
|
|
case 4:
|
|
data.push_back(static_cast<float>(valueMin + valueX + valueAdd));
|
|
data.push_back(static_cast<float>(valueMin + valueY + valueAdd));
|
|
data.push_back(static_cast<float>(valueMin + valueX / 2 + valueAdd));
|
|
data.push_back(static_cast<float>(valueMin + valueY / 2 + valueAdd));
|
|
break;
|
|
default:
|
|
log_error("DataGenerate(): invalid channel number!");
|
|
return;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_half> &data, unsigned int width, unsigned int height,
|
|
unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
|
|
{
|
|
data.clear();
|
|
data.reserve(width * height * channelNum);
|
|
|
|
double valueMin = static_cast<double>(cmin);
|
|
double valueMax = static_cast<double>(cmax);
|
|
double stepX = (valueMax - valueMin) / static_cast<double>(width);
|
|
double stepY = (valueMax - valueMin) /static_cast<double>(height);
|
|
|
|
switch(type)
|
|
{
|
|
case CL_HALF_FLOAT:
|
|
{
|
|
double valueAdd = static_cast<double>(add);
|
|
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
double valueY = static_cast<double>(stepY * i);
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
double valueX = static_cast<double>(stepX * j);
|
|
switch (channelNum)
|
|
{
|
|
case 1:
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX / 2 + valueY / 2 + valueAdd)));
|
|
break;
|
|
case 2:
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX + valueAdd)));
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY + valueAdd)));
|
|
break;
|
|
case 4:
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX + valueAdd)));
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY + valueAdd)));
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX / 2 + valueAdd)));
|
|
data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY / 2 + valueAdd)));
|
|
break;
|
|
default:
|
|
log_error("DataGenerate(): invalid channel number!");
|
|
return;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case CL_UNORM_INT16:
|
|
{
|
|
double range = 65535;
|
|
double valueAdd = static_cast<double>(add * range);
|
|
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
double valueY = static_cast<double>(stepY * i * range);
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
double valueX = static_cast<double>(stepX * j * range);
|
|
switch (channelNum)
|
|
{
|
|
case 1:
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
|
|
break;
|
|
case 2:
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueX + valueAdd));
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueY + valueAdd));
|
|
break;
|
|
case 4:
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueX + valueAdd));
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueY + valueAdd));
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueX / 2 + valueAdd));
|
|
data.push_back(static_cast<cl_ushort>(valueMin + valueY / 2 + valueAdd));
|
|
break;
|
|
default:
|
|
log_error("DataGenerate(): invalid channel number!");
|
|
return;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
log_error("DataGenerate(): unknown data type!");
|
|
return;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_uchar> &data, unsigned int width, unsigned int height,
|
|
unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
|
|
{
|
|
data.clear();
|
|
data.reserve(width * height * channelNum);
|
|
|
|
double valueMin = static_cast<double>(cmin);
|
|
double valueMax = static_cast<double>(cmax);
|
|
double stepX = (valueMax - valueMin) / static_cast<double>(width);
|
|
double stepY = (valueMax - valueMin) /static_cast<double>(height);
|
|
|
|
double range = 255;
|
|
double valueAdd = static_cast<double>(add * range);
|
|
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
double valueY = static_cast<double>(stepY * i * range);
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
double valueX = static_cast<double>(stepX * j * range);
|
|
switch (channelNum)
|
|
{
|
|
case 1:
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
|
|
break;
|
|
case 2:
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueX + valueAdd));
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueY + valueAdd));
|
|
break;
|
|
case 4:
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueX + valueAdd));
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueY + valueAdd));
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueX / 2 + valueAdd));
|
|
if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8)
|
|
data.push_back(static_cast<cl_uchar>(0xff));
|
|
else
|
|
data.push_back(static_cast<cl_uchar>(valueMin + valueY / 2 + valueAdd));
|
|
break;
|
|
default:
|
|
log_error("DataGenerate(): invalid channel number!");
|
|
return;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<float> &dataTest, const std::vector<float> &dataExp,
|
|
unsigned int width, unsigned int height, unsigned int channelNum)
|
|
{
|
|
float epsilon = 0.000001f;
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
unsigned int offset = i * width * channelNum;
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
|
|
{
|
|
if (abs(dataTest.at(offset + j * channelNum + planeIdx) - dataExp.at(offset + j * channelNum + planeIdx)) > epsilon)
|
|
{
|
|
log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %f, expected value = %f\n",
|
|
j, i, planeIdx, dataTest[offset + j * channelNum + planeIdx], dataExp[offset + j * channelNum + planeIdx]);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_half> &dataTest, const std::vector<cl_half> &dataExp,
|
|
unsigned int width, unsigned int height, unsigned int channelNum)
|
|
{
|
|
switch(type)
|
|
{
|
|
case CL_HALF_FLOAT:
|
|
{
|
|
float epsilon = 0.001f;
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
unsigned int offset = i * width * channelNum;
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
|
|
{
|
|
float test = convert_half_to_float(dataTest.at(offset + j * channelNum + planeIdx));
|
|
float ref = convert_half_to_float(dataExp.at(offset + j * channelNum + planeIdx));
|
|
if (abs(test - ref) > epsilon)
|
|
{
|
|
log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %f, expected value = %f\n",
|
|
j, i, planeIdx, test, ref);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case CL_UNORM_INT16:
|
|
{
|
|
cl_ushort epsilon = 1;
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
unsigned int offset = i * width * channelNum;
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
|
|
{
|
|
cl_ushort test = dataTest.at(offset + j * channelNum + planeIdx);
|
|
cl_ushort ref = dataExp.at(offset + j * channelNum + planeIdx);
|
|
if (abs(test - ref) > epsilon)
|
|
{
|
|
log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %i, expected value = %i\n", j, i, planeIdx, test, ref);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
log_error("DataCompare(): Invalid data format!");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_uchar> &dataTest, const std::vector<cl_uchar> &dataExp,
|
|
unsigned int width, unsigned int height, unsigned int planeNum )
|
|
{
|
|
for (unsigned int i = 0; i < height; ++i)
|
|
{
|
|
unsigned int offset = i * width * planeNum;
|
|
for (unsigned int j = 0; j < width; ++j)
|
|
{
|
|
for(unsigned planeIdx = 0; planeIdx < planeNum; ++planeIdx)
|
|
{
|
|
if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8 && planeIdx == 3)
|
|
continue;
|
|
|
|
cl_uchar test = dataTest.at(offset + j * planeNum + planeIdx);
|
|
cl_uchar ref = dataExp.at(offset + j * planeNum + planeIdx);
|
|
if (test != ref)
|
|
{
|
|
log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %i, expected value = %i\n",
|
|
j, i, planeIdx, test, ref);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool GetImageInfo( cl_mem object, cl_image_format formatExp, size_t elementSizeExp, size_t rowPitchExp,
|
|
size_t slicePitchExp, size_t widthExp, size_t heightExp, size_t depthExp , unsigned int planeExp)
|
|
{
|
|
bool result = true;
|
|
|
|
cl_image_format format;
|
|
if (clGetImageInfo(object, CL_IMAGE_FORMAT, sizeof(cl_image_format), &format, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_FORMAT) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (formatExp.image_channel_order != format.image_channel_order || formatExp.image_channel_data_type != format.image_channel_data_type)
|
|
{
|
|
log_error("Value of CL_IMAGE_FORMAT is different than expected\n");
|
|
result = false;
|
|
}
|
|
|
|
size_t elementSize = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elementSize, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_ELEMENT_SIZE) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (elementSizeExp != elementSize)
|
|
{
|
|
log_error("Value of CL_IMAGE_ELEMENT_SIZE is different than expected (size: %i, exp size: %i)\n", elementSize, elementSizeExp);
|
|
result = false;
|
|
}
|
|
|
|
size_t rowPitch = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_ROW_PITCH, sizeof(size_t), &rowPitch, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_ROW_PITCH) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if ((rowPitchExp == 0 && rowPitchExp != rowPitch) || (rowPitchExp > 0 && rowPitchExp > rowPitch))
|
|
{
|
|
log_error("Value of CL_IMAGE_ROW_PITCH is different than expected (size: %i, exp size: %i)\n", rowPitch, rowPitchExp);
|
|
result = false;
|
|
}
|
|
|
|
size_t slicePitch = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_SLICE_PITCH, sizeof(size_t), &slicePitch, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_SLICE_PITCH) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if ((slicePitchExp == 0 && slicePitchExp != slicePitch) || (slicePitchExp > 0 && slicePitchExp > slicePitch))
|
|
{
|
|
log_error("Value of CL_IMAGE_SLICE_PITCH is different than expected (size: %i, exp size: %i)\n", slicePitch, slicePitchExp);
|
|
result = false;
|
|
}
|
|
|
|
size_t width = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_WIDTH, sizeof(size_t), &width, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_WIDTH) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (widthExp != width)
|
|
{
|
|
log_error("Value of CL_IMAGE_WIDTH is different than expected (size: %i, exp size: %i)\n", width, widthExp);
|
|
result = false;
|
|
}
|
|
|
|
size_t height = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_HEIGHT, sizeof(size_t), &height, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_HEIGHT) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (heightExp != height)
|
|
{
|
|
log_error("Value of CL_IMAGE_HEIGHT is different than expected (size: %i, exp size: %i)\n", height, heightExp);
|
|
result = false;
|
|
}
|
|
|
|
size_t depth = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_DEPTH, sizeof(size_t), &depth, 0) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_DEPTH) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (depthExp != depth)
|
|
{
|
|
log_error("Value of CL_IMAGE_DEPTH is different than expected (size: %i, exp size: %i)\n", depth, depthExp);
|
|
result = false;
|
|
}
|
|
|
|
unsigned int plane = 99;
|
|
size_t paramSize = 0;
|
|
if (clGetImageInfo(object, CL_IMAGE_DX9_MEDIA_PLANE_KHR, sizeof(unsigned int), &plane, ¶mSize) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_IMAGE_MEDIA_SURFACE_PLANE_KHR) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (planeExp != plane)
|
|
{
|
|
log_error("Value of CL_IMAGE_MEDIA_SURFACE_PLANE_KHR is different than expected (plane: %i, exp plane: %i)\n", plane, planeExp);
|
|
result = false;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool GetMemObjInfo( cl_mem object, cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr<CSurfaceWrapper> &surface, void *shareHandleExp )
|
|
{
|
|
bool result = true;
|
|
switch(adapterType)
|
|
{
|
|
case CL_ADAPTER_D3D9_KHR:
|
|
case CL_ADAPTER_D3D9EX_KHR:
|
|
case CL_ADAPTER_DXVA_KHR:
|
|
{
|
|
#if defined(_WIN32)
|
|
cl_dx9_surface_info_khr surfaceInfo;
|
|
#else
|
|
void *surfaceInfo = 0;
|
|
return false;
|
|
#endif
|
|
size_t paramSize = 0;
|
|
if(clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR, sizeof(surfaceInfo), &surfaceInfo, ¶mSize) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
#if defined(_WIN32)
|
|
CD3D9SurfaceWrapper *d3d9Surface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
if (*d3d9Surface != surfaceInfo.resource)
|
|
{
|
|
log_error("Invalid resource for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
|
|
result = false;
|
|
}
|
|
|
|
if (shareHandleExp != surfaceInfo.shared_handle)
|
|
{
|
|
log_error("Invalid shared handle for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
|
|
result = false;
|
|
}
|
|
#else
|
|
return false;
|
|
#endif
|
|
|
|
if (paramSize != sizeof(surfaceInfo))
|
|
{
|
|
log_error("Invalid CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR parameter size: %i, expected: %i\n", paramSize, sizeof(surfaceInfo));
|
|
result = false;
|
|
}
|
|
|
|
paramSize = 0;
|
|
cl_dx9_media_adapter_type_khr mediaAdapterType;
|
|
if(clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR, sizeof(mediaAdapterType), &mediaAdapterType, ¶mSize) != CL_SUCCESS)
|
|
{
|
|
log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR) failed\n");
|
|
result = false;
|
|
}
|
|
|
|
if (adapterType != mediaAdapterType)
|
|
{
|
|
log_error("Invalid media adapter type for CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR\n");
|
|
result = false;
|
|
}
|
|
|
|
if (paramSize != sizeof(mediaAdapterType))
|
|
{
|
|
log_error("Invalid CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR parameter size: %i, expected: %i\n", paramSize, sizeof(mediaAdapterType));
|
|
result = false;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
log_error("GetMemObjInfo(): Unknown adapter type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool ImageInfoVerify( cl_dx9_media_adapter_type_khr adapterType, const std::vector<cl_mem> &memObjList, unsigned int width, unsigned int height,
|
|
std::auto_ptr<CSurfaceWrapper> &surface, void *sharedHandle)
|
|
{
|
|
if (memObjList.size() != 2 && memObjList.size() != 3)
|
|
{
|
|
log_error("ImageInfoVerify(): Invalid object list parameter\n");
|
|
return false;
|
|
}
|
|
|
|
cl_image_format formatPlane;
|
|
formatPlane.image_channel_data_type = CL_UNORM_INT8;
|
|
formatPlane.image_channel_order = CL_R;
|
|
|
|
//plane 0 verification
|
|
if (!GetImageInfo(memObjList[0], formatPlane, sizeof(cl_uchar),
|
|
width * sizeof(cl_uchar),
|
|
0,
|
|
width, height, 0, 0))
|
|
{
|
|
log_error("clGetImageInfo failed\n");
|
|
return false;
|
|
}
|
|
|
|
switch (memObjList.size())
|
|
{
|
|
case 2:
|
|
{
|
|
formatPlane.image_channel_data_type = CL_UNORM_INT8;
|
|
formatPlane.image_channel_order = CL_RG;
|
|
if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar) * 2,
|
|
width * sizeof(cl_uchar),
|
|
0,
|
|
width / 2, height / 2, 0, 1))
|
|
{
|
|
log_error("clGetImageInfo failed\n");
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
case 3:
|
|
{
|
|
if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar),
|
|
width * sizeof(cl_uchar) / 2,
|
|
0,
|
|
width / 2, height / 2, 0, 1))
|
|
{
|
|
log_error("clGetImageInfo failed\n");
|
|
return false;
|
|
}
|
|
|
|
if (!GetImageInfo(memObjList[2], formatPlane, sizeof(cl_uchar),
|
|
width * sizeof(cl_uchar) / 2,
|
|
0,
|
|
width / 2, height / 2, 0, 2))
|
|
{
|
|
log_error("clGetImageInfo failed\n");
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
log_error("ImageInfoVerify(): Invalid object list parameter\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
for (size_t i = 0; i < memObjList.size(); ++i)
|
|
{
|
|
if (!GetMemObjInfo(memObjList[i], adapterType, surface, sharedHandle))
|
|
{
|
|
log_error("clGetMemObjInfo(%i) failed\n", i);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType, const cl_image_format imageFormatCheck)
|
|
{
|
|
cl_uint imageFormatsNum = 0;
|
|
cl_int error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType, 0, 0, &imageFormatsNum);
|
|
if(error != CL_SUCCESS)
|
|
{
|
|
log_error("clGetSupportedImageFormats failed\n");
|
|
return false;
|
|
}
|
|
|
|
if(imageFormatsNum < 1)
|
|
{
|
|
log_error("Invalid image format number returned by clGetSupportedImageFormats\n");
|
|
return false;
|
|
}
|
|
|
|
std::vector<cl_image_format> imageFormats(imageFormatsNum);
|
|
error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType, imageFormatsNum, &imageFormats[0], 0);
|
|
if(error != CL_SUCCESS)
|
|
{
|
|
log_error("clGetSupportedImageFormats failed\n");
|
|
return false;
|
|
}
|
|
|
|
for(cl_uint i = 0; i < imageFormatsNum; ++i)
|
|
{
|
|
if(imageFormats[i].image_channel_data_type == imageFormatCheck.image_channel_data_type
|
|
&& imageFormats[i].image_channel_order == imageFormatCheck.image_channel_order)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
unsigned int ChannelNum( TSurfaceFormat surfaceFormat )
|
|
{
|
|
switch(surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_R32F:
|
|
case SURFACE_FORMAT_R16F:
|
|
case SURFACE_FORMAT_L16:
|
|
case SURFACE_FORMAT_A8:
|
|
case SURFACE_FORMAT_L8:
|
|
return 1;
|
|
break;
|
|
case SURFACE_FORMAT_G32R32F:
|
|
case SURFACE_FORMAT_G16R16F:
|
|
case SURFACE_FORMAT_G16R16:
|
|
case SURFACE_FORMAT_A8L8:
|
|
return 2;
|
|
break;
|
|
case SURFACE_FORMAT_NV12:
|
|
case SURFACE_FORMAT_YV12:
|
|
return 3;
|
|
break;
|
|
case SURFACE_FORMAT_A32B32G32R32F:
|
|
case SURFACE_FORMAT_A16B16G16R16F:
|
|
case SURFACE_FORMAT_A16B16G16R16:
|
|
case SURFACE_FORMAT_A8B8G8R8:
|
|
case SURFACE_FORMAT_X8B8G8R8:
|
|
case SURFACE_FORMAT_A8R8G8B8:
|
|
case SURFACE_FORMAT_X8R8G8B8:
|
|
return 4;
|
|
break;
|
|
default:
|
|
log_error("ChannelNum(): unknown surface format!\n");
|
|
return 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
unsigned int PlanesNum( TSurfaceFormat surfaceFormat )
|
|
{
|
|
switch(surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_R32F:
|
|
case SURFACE_FORMAT_R16F:
|
|
case SURFACE_FORMAT_L16:
|
|
case SURFACE_FORMAT_A8:
|
|
case SURFACE_FORMAT_L8:
|
|
case SURFACE_FORMAT_G32R32F:
|
|
case SURFACE_FORMAT_G16R16F:
|
|
case SURFACE_FORMAT_G16R16:
|
|
case SURFACE_FORMAT_A8L8:
|
|
case SURFACE_FORMAT_A32B32G32R32F:
|
|
case SURFACE_FORMAT_A16B16G16R16F:
|
|
case SURFACE_FORMAT_A16B16G16R16:
|
|
case SURFACE_FORMAT_A8B8G8R8:
|
|
case SURFACE_FORMAT_X8B8G8R8:
|
|
case SURFACE_FORMAT_A8R8G8B8:
|
|
case SURFACE_FORMAT_X8R8G8B8:
|
|
return 1;
|
|
break;
|
|
case SURFACE_FORMAT_NV12:
|
|
return 2;
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
return 3;
|
|
break;
|
|
default:
|
|
log_error("PlanesNum(): unknown surface format!\n");
|
|
return 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
#if defined(_WIN32)
|
|
D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat)
|
|
{
|
|
switch(surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_R32F:
|
|
return D3DFMT_R32F;
|
|
break;
|
|
case SURFACE_FORMAT_R16F:
|
|
return D3DFMT_R16F;
|
|
break;
|
|
case SURFACE_FORMAT_L16:
|
|
return D3DFMT_L16;
|
|
break;
|
|
case SURFACE_FORMAT_A8:
|
|
return D3DFMT_A8;
|
|
break;
|
|
case SURFACE_FORMAT_L8:
|
|
return D3DFMT_L8;
|
|
break;
|
|
case SURFACE_FORMAT_G32R32F:
|
|
return D3DFMT_G32R32F;
|
|
break;
|
|
case SURFACE_FORMAT_G16R16F:
|
|
return D3DFMT_G16R16F;
|
|
break;
|
|
case SURFACE_FORMAT_G16R16:
|
|
return D3DFMT_G16R16;
|
|
break;
|
|
case SURFACE_FORMAT_A8L8:
|
|
return D3DFMT_A8L8;
|
|
break;
|
|
case SURFACE_FORMAT_A32B32G32R32F:
|
|
return D3DFMT_A32B32G32R32F;
|
|
break;
|
|
case SURFACE_FORMAT_A16B16G16R16F:
|
|
return D3DFMT_A16B16G16R16F;
|
|
break;
|
|
case SURFACE_FORMAT_A16B16G16R16:
|
|
return D3DFMT_A16B16G16R16;
|
|
break;
|
|
case SURFACE_FORMAT_A8B8G8R8:
|
|
return D3DFMT_A8B8G8R8;
|
|
break;
|
|
case SURFACE_FORMAT_X8B8G8R8:
|
|
return D3DFMT_X8B8G8R8;
|
|
break;
|
|
case SURFACE_FORMAT_A8R8G8B8:
|
|
return D3DFMT_A8R8G8B8;
|
|
break;
|
|
case SURFACE_FORMAT_X8R8G8B8:
|
|
return D3DFMT_X8R8G8B8;
|
|
break;
|
|
case SURFACE_FORMAT_NV12:
|
|
return static_cast<D3DFORMAT>(MAKEFOURCC('N', 'V', '1', '2'));
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
return static_cast<D3DFORMAT>(MAKEFOURCC('Y', 'V', '1', '2'));
|
|
break;
|
|
default:
|
|
log_error("SurfaceFormatToD3D(): unknown surface format!\n");
|
|
return D3DFMT_R32F;
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
bool DeviceCreate( cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr<CDeviceWrapper> &device )
|
|
{
|
|
switch (adapterType)
|
|
{
|
|
#if defined(_WIN32)
|
|
case CL_ADAPTER_D3D9_KHR:
|
|
device = std::auto_ptr<CDeviceWrapper>(new CD3D9Wrapper());
|
|
break;
|
|
case CL_ADAPTER_D3D9EX_KHR:
|
|
device = std::auto_ptr<CDeviceWrapper>(new CD3D9ExWrapper());
|
|
break;
|
|
case CL_ADAPTER_DXVA_KHR:
|
|
device = std::auto_ptr<CDeviceWrapper>(new CDXVAWrapper());
|
|
break;
|
|
#endif
|
|
default:
|
|
log_error("DeviceCreate(): Unknown adapter type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return device->Status();
|
|
}
|
|
|
|
bool SurfaceFormatCheck( cl_dx9_media_adapter_type_khr adapterType, const CDeviceWrapper &device, TSurfaceFormat surfaceFormat )
|
|
{
|
|
switch (adapterType)
|
|
{
|
|
#if defined(_WIN32)
|
|
case CL_ADAPTER_D3D9_KHR:
|
|
case CL_ADAPTER_D3D9EX_KHR:
|
|
case CL_ADAPTER_DXVA_KHR:
|
|
{
|
|
D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
|
|
LPDIRECT3D9 d3d9 = static_cast<LPDIRECT3D9>(device.D3D());
|
|
D3DDISPLAYMODE d3ddm;
|
|
d3d9->GetAdapterDisplayMode(device.AdapterIdx(), &d3ddm);
|
|
|
|
if( FAILED(d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_SURFACE, d3dFormat)) )
|
|
return false;
|
|
}
|
|
break;
|
|
#endif
|
|
default:
|
|
log_error("SurfaceFormatCheck(): Unknown adapter type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format)
|
|
{
|
|
switch(surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_R32F:
|
|
format.image_channel_order = CL_R;
|
|
format.image_channel_data_type = CL_FLOAT;
|
|
break;
|
|
case SURFACE_FORMAT_R16F:
|
|
format.image_channel_order = CL_R;
|
|
format.image_channel_data_type = CL_HALF_FLOAT;
|
|
break;
|
|
case SURFACE_FORMAT_L16:
|
|
format.image_channel_order = CL_R;
|
|
format.image_channel_data_type = CL_UNORM_INT16;
|
|
break;
|
|
case SURFACE_FORMAT_A8:
|
|
format.image_channel_order = CL_A;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_L8:
|
|
format.image_channel_order = CL_R;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_G32R32F:
|
|
format.image_channel_order = CL_RG;
|
|
format.image_channel_data_type = CL_FLOAT;
|
|
break;
|
|
case SURFACE_FORMAT_G16R16F:
|
|
format.image_channel_order = CL_RG;
|
|
format.image_channel_data_type = CL_HALF_FLOAT;
|
|
break;
|
|
case SURFACE_FORMAT_G16R16:
|
|
format.image_channel_order = CL_RG;
|
|
format.image_channel_data_type = CL_UNORM_INT16;
|
|
break;
|
|
case SURFACE_FORMAT_A8L8:
|
|
format.image_channel_order = CL_RG;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_A32B32G32R32F:
|
|
format.image_channel_order = CL_RGBA;
|
|
format.image_channel_data_type = CL_FLOAT;
|
|
break;
|
|
case SURFACE_FORMAT_A16B16G16R16F:
|
|
format.image_channel_order = CL_RGBA;
|
|
format.image_channel_data_type = CL_HALF_FLOAT;
|
|
break;
|
|
case SURFACE_FORMAT_A16B16G16R16:
|
|
format.image_channel_order = CL_RGBA;
|
|
format.image_channel_data_type = CL_UNORM_INT16;
|
|
break;
|
|
case SURFACE_FORMAT_A8B8G8R8:
|
|
format.image_channel_order = CL_RGBA;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_X8B8G8R8:
|
|
format.image_channel_order = CL_RGBA;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_A8R8G8B8:
|
|
format.image_channel_order = CL_BGRA;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_X8R8G8B8:
|
|
format.image_channel_order = CL_BGRA;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_NV12:
|
|
format.image_channel_order = CL_R;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
format.image_channel_order = CL_R;
|
|
format.image_channel_data_type = CL_UNORM_INT8;
|
|
break;
|
|
default:
|
|
log_error("SurfaceFormatToOCL(): Unknown surface format!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void SurfaceFormatToString( TSurfaceFormat surfaceFormat, std::string &str )
|
|
{
|
|
switch(surfaceFormat)
|
|
{
|
|
case SURFACE_FORMAT_R32F:
|
|
str = "R32F";
|
|
break;
|
|
case SURFACE_FORMAT_R16F:
|
|
str = "R16F";
|
|
break;
|
|
case SURFACE_FORMAT_L16:
|
|
str = "L16";
|
|
break;
|
|
case SURFACE_FORMAT_A8:
|
|
str = "A8";
|
|
break;
|
|
case SURFACE_FORMAT_L8:
|
|
str = "L8";
|
|
break;
|
|
case SURFACE_FORMAT_G32R32F:
|
|
str = "G32R32F";
|
|
break;
|
|
case SURFACE_FORMAT_G16R16F:
|
|
str = "G16R16F";
|
|
break;
|
|
case SURFACE_FORMAT_G16R16:
|
|
str = "G16R16";
|
|
break;
|
|
case SURFACE_FORMAT_A8L8:
|
|
str = "A8L8";
|
|
break;
|
|
case SURFACE_FORMAT_A32B32G32R32F:
|
|
str = "A32B32G32R32F";
|
|
break;
|
|
case SURFACE_FORMAT_A16B16G16R16F:
|
|
str = "A16B16G16R16F";
|
|
break;
|
|
case SURFACE_FORMAT_A16B16G16R16:
|
|
str = "A16B16G16R16";
|
|
break;
|
|
case SURFACE_FORMAT_A8B8G8R8:
|
|
str = "A8B8G8R8";
|
|
break;
|
|
case SURFACE_FORMAT_X8B8G8R8:
|
|
str = "X8B8G8R8";
|
|
break;
|
|
case SURFACE_FORMAT_A8R8G8B8:
|
|
str = "A8R8G8B8";
|
|
break;
|
|
case SURFACE_FORMAT_X8R8G8B8:
|
|
str = "X8R8G8B8";
|
|
break;
|
|
case SURFACE_FORMAT_NV12:
|
|
str = "NV12";
|
|
break;
|
|
case SURFACE_FORMAT_YV12:
|
|
str = "YV12";
|
|
break;
|
|
default:
|
|
log_error("SurfaceFormatToString(): unknown surface format!\n");
|
|
str = "unknown";
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType, unsigned int width, unsigned int height, TSurfaceFormat surfaceFormat,
|
|
CDeviceWrapper &device, std::auto_ptr<CSurfaceWrapper> &surface, bool sharedHandle, void **objectSharedHandle)
|
|
{
|
|
switch (adapterType)
|
|
{
|
|
#if defined(_WIN32)
|
|
case CL_ADAPTER_D3D9_KHR:
|
|
{
|
|
surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
HRESULT hr = 0;
|
|
D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
|
|
LPDIRECT3DDEVICE9 d3d9Device = (LPDIRECT3DDEVICE9)device.Device();
|
|
hr = d3d9Device->CreateOffscreenPlainSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
|
|
sharedHandle ? objectSharedHandle: 0);
|
|
|
|
if ( FAILED(hr))
|
|
{
|
|
log_error("CreateOffscreenPlainSurface failed\n");
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
case CL_ADAPTER_D3D9EX_KHR:
|
|
{
|
|
surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
HRESULT hr = 0;
|
|
D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
|
|
LPDIRECT3DDEVICE9EX d3d9ExDevice = (LPDIRECT3DDEVICE9EX)device.Device();
|
|
hr = d3d9ExDevice->CreateOffscreenPlainSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
|
|
sharedHandle ? objectSharedHandle: 0);
|
|
|
|
if ( FAILED(hr))
|
|
{
|
|
log_error("CreateOffscreenPlainSurface failed\n");
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
case CL_ADAPTER_DXVA_KHR:
|
|
{
|
|
surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
|
|
CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
|
|
HRESULT hr = 0;
|
|
D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
|
|
IDXVAHD_Device *dxvaDevice = (IDXVAHD_Device *)device.Device();
|
|
hr = dxvaDevice->CreateVideoSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, 0,
|
|
DXVAHD_SURFACE_TYPE_VIDEO_INPUT, 1, &(*d3dSurface), sharedHandle ? objectSharedHandle: 0);
|
|
|
|
if ( FAILED(hr))
|
|
{
|
|
log_error("CreateVideoSurface failed\n");
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
#endif
|
|
default:
|
|
log_error("MediaSurfaceCreate(): Unknown adapter type!\n");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
int DetectFloatToHalfRoundingMode( cl_command_queue q ) // Returns CL_SUCCESS on success
|
|
{
|
|
cl_int err = CL_SUCCESS;
|
|
|
|
if( gFloatToHalfRoundingMode == kDefaultRoundingMode )
|
|
{
|
|
// Some numbers near 0.5f, that we look at to see how the values are rounded.
|
|
static const cl_uint inData[4*4] = { 0x3f000fffU, 0x3f001000U, 0x3f001001U, 0U, 0x3f001fffU, 0x3f002000U, 0x3f002001U, 0U,
|
|
0x3f002fffU, 0x3f003000U, 0x3f003001U, 0U, 0x3f003fffU, 0x3f004000U, 0x3f004001U, 0U };
|
|
static const size_t count = sizeof( inData ) / (4*sizeof( inData[0] ));
|
|
const float *inp = (const float*) inData;
|
|
cl_context context = NULL;
|
|
|
|
// Create an input buffer
|
|
err = clGetCommandQueueInfo( q, CL_QUEUE_CONTEXT, sizeof(context), &context, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not get context from command queue in DetectFloatToHalfRoundingMode (%d)", err );
|
|
return err;
|
|
}
|
|
|
|
cl_mem inBuf = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, sizeof( inData ), (void*) inData, &err );
|
|
if( NULL == inBuf || err )
|
|
{
|
|
log_error( "Error: could not create input buffer in DetectFloatToHalfRoundingMode (err: %d)", err );
|
|
return err;
|
|
}
|
|
|
|
// Create a small output image
|
|
cl_image_format fmt = { CL_RGBA, CL_HALF_FLOAT };
|
|
cl_image_desc imageDesc = { 0 };
|
|
imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
|
imageDesc.image_width = count;
|
|
imageDesc.image_height = 1;
|
|
|
|
cl_mem outImage = clCreateImage(context, CL_MEM_READ_WRITE, &fmt, &imageDesc, 0, &err);
|
|
if( NULL == outImage || err )
|
|
{
|
|
log_error( "Error: could not create half float out image in DetectFloatToHalfRoundingMode (err: %d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
return err;
|
|
}
|
|
|
|
// Create our program, and a kernel
|
|
const char *kernel[1] = {
|
|
"kernel void detect_round( global float4 *in, write_only image2d_t out )\n"
|
|
"{\n"
|
|
" write_imagef( out, (int2)(get_global_id(0),0), in[get_global_id(0)] );\n"
|
|
"}\n" };
|
|
cl_program program = clCreateProgramWithSource( context, 1, kernel, NULL, &err );
|
|
if( NULL == program || err )
|
|
{
|
|
log_error( "Error: could not create program in DetectFloatToHalfRoundingMode (err: %d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
return err;
|
|
}
|
|
|
|
cl_device_id device = NULL;
|
|
err = clGetCommandQueueInfo( q, CL_QUEUE_DEVICE, sizeof(device), &device, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not get device from command queue in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
return err;
|
|
}
|
|
|
|
err = clBuildProgram( program, 1, &device, "", NULL, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not build program in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
return err;
|
|
}
|
|
|
|
cl_kernel k = clCreateKernel( program, "detect_round", &err );
|
|
if( NULL == k || err )
|
|
{
|
|
log_error( "Error: could not create kernel in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
return err;
|
|
}
|
|
|
|
err = clSetKernelArg( k, 0, sizeof( cl_mem ), &inBuf );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not set argument 0 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
err = clSetKernelArg( k, 1, sizeof( cl_mem ), &outImage );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not set argument 1 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
// Run the kernel
|
|
size_t global_work_size = count;
|
|
err = clEnqueueNDRangeKernel( q, k, 1, NULL, &global_work_size, NULL, 0, NULL, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not enqueue kernel in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
// read the results
|
|
cl_ushort outBuf[count*4];
|
|
memset( outBuf, -1, sizeof( outBuf ) );
|
|
size_t origin[3] = {0,0,0};
|
|
size_t region[3] = {count,1,1};
|
|
err = clEnqueueReadImage( q, outImage, CL_TRUE, origin, region, 0, 0, outBuf, 0, NULL, NULL );
|
|
if( err )
|
|
{
|
|
log_error( "Error: could not read output image in DetectFloatToHalfRoundingMode (%d)", err );
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
// Generate our list of reference results
|
|
cl_ushort rte_ref[count*4];
|
|
cl_ushort rtz_ref[count*4];
|
|
for( size_t i = 0; i < 4 * count; i++ )
|
|
{
|
|
rte_ref[i] = float2half_rte( inp[i] );
|
|
rtz_ref[i] = float2half_rtz( inp[i] );
|
|
}
|
|
|
|
// Verify that we got something in either rtz or rte mode
|
|
if( 0 == memcmp( rte_ref, outBuf, sizeof( rte_ref )) )
|
|
{
|
|
log_info( "Autodetected float->half rounding mode to be rte\n" );
|
|
gFloatToHalfRoundingMode = kRoundToNearestEven;
|
|
}
|
|
else if ( 0 == memcmp( rtz_ref, outBuf, sizeof( rtz_ref )) )
|
|
{
|
|
log_info( "Autodetected float->half rounding mode to be rtz\n" );
|
|
gFloatToHalfRoundingMode = kRoundTowardZero;
|
|
}
|
|
else
|
|
{
|
|
log_error( "ERROR: float to half conversions proceed with invalid rounding mode!\n" );
|
|
log_info( "\nfor:" );
|
|
for( size_t i = 0; i < count; i++ )
|
|
log_info( " {%a, %a, %a, %a},", inp[4*i], inp[4*i+1], inp[4*i+2], inp[4*i+3] );
|
|
log_info( "\ngot:" );
|
|
for( size_t i = 0; i < count; i++ )
|
|
log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", outBuf[4*i], outBuf[4*i+1], outBuf[4*i+2], outBuf[4*i+3] );
|
|
log_info( "\nrte:" );
|
|
for( size_t i = 0; i < count; i++ )
|
|
log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rte_ref[4*i], rte_ref[4*i+1], rte_ref[4*i+2], rte_ref[4*i+3] );
|
|
log_info( "\nrtz:" );
|
|
for( size_t i = 0; i < count; i++ )
|
|
log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rtz_ref[4*i], rtz_ref[4*i+1], rtz_ref[4*i+2], rtz_ref[4*i+3] );
|
|
log_info( "\n" );
|
|
err = -1;
|
|
gFloatToHalfRoundingMode = kRoundingModeCount; // illegal value
|
|
}
|
|
|
|
// clean up
|
|
clReleaseMemObject( inBuf );
|
|
clReleaseMemObject( outImage );
|
|
clReleaseProgram( program );
|
|
clReleaseKernel( k );
|
|
return err;
|
|
}
|
|
|
|
// Make sure that the rounding mode was successfully detected, if we checked earlier
|
|
if( gFloatToHalfRoundingMode != kRoundToNearestEven && gFloatToHalfRoundingMode != kRoundTowardZero)
|
|
return -2;
|
|
|
|
return err;
|
|
}
|
|
|
|
cl_ushort convert_float_to_half( float f )
|
|
{
|
|
switch( gFloatToHalfRoundingMode )
|
|
{
|
|
case kRoundToNearestEven:
|
|
return float2half_rte( f );
|
|
case kRoundTowardZero:
|
|
return float2half_rtz( f );
|
|
default:
|
|
log_error( "ERROR: Test internal error -- unhandled or unknown float->half rounding mode.\n" );
|
|
exit(-1);
|
|
return 0xffff;
|
|
}
|
|
|
|
}
|
|
|
|
cl_ushort float2half_rte( float f )
|
|
{
|
|
union{ float f; cl_uint u; } u = {f};
|
|
cl_uint sign = (u.u >> 16) & 0x8000;
|
|
float x = fabsf(f);
|
|
|
|
//Nan
|
|
if( x != x )
|
|
{
|
|
u.u >>= (24-11);
|
|
u.u &= 0x7fff;
|
|
u.u |= 0x0200; //silence the NaN
|
|
return u.u | sign;
|
|
}
|
|
|
|
// overflow
|
|
if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
|
|
return 0x7c00 | sign;
|
|
|
|
// underflow
|
|
if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
|
|
return sign; // The halfway case can return 0x0001 or 0. 0 is even.
|
|
|
|
// very small
|
|
if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
|
|
return sign | 1;
|
|
|
|
// half denormal
|
|
if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
|
|
{
|
|
u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
|
|
return sign | u.u;
|
|
}
|
|
|
|
u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
|
|
u.u &= 0x7f800000;
|
|
x += u.f;
|
|
u.f = x - u.f;
|
|
u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
|
|
|
|
return (u.u >> (24-11)) | sign;
|
|
}
|
|
|
|
cl_ushort float2half_rtz( float f )
|
|
{
|
|
union{ float f; cl_uint u; } u = {f};
|
|
cl_uint sign = (u.u >> 16) & 0x8000;
|
|
float x = fabsf(f);
|
|
|
|
//Nan
|
|
if( x != x )
|
|
{
|
|
u.u >>= (24-11);
|
|
u.u &= 0x7fff;
|
|
u.u |= 0x0200; //silence the NaN
|
|
return u.u | sign;
|
|
}
|
|
|
|
// overflow
|
|
if( x >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
|
|
{
|
|
if( x == INFINITY )
|
|
return 0x7c00 | sign;
|
|
|
|
return 0x7bff | sign;
|
|
}
|
|
|
|
// underflow
|
|
if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
|
|
return sign; // The halfway case can return 0x0001 or 0. 0 is even.
|
|
|
|
// half denormal
|
|
if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
|
|
{
|
|
x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
|
|
return (cl_ushort)((int) x | sign);
|
|
}
|
|
|
|
u.u &= 0xFFFFE000U;
|
|
u.u -= 0x38000000U;
|
|
|
|
return (u.u >> (24-11)) | sign;
|
|
}
|
|
|
|
float convert_half_to_float( unsigned short halfValue )
|
|
{
|
|
// We have to take care of a few special cases, but in general, we just extract
|
|
// the same components from the half that exist in the float and re-stuff them
|
|
// For a description of the actual half format, see http://en.wikipedia.org/wiki/Half_precision
|
|
// Note: we store these in 32-bit ints to make the bit manipulations easier later
|
|
int sign = ( halfValue >> 15 ) & 0x0001;
|
|
int exponent = ( halfValue >> 10 ) & 0x001f;
|
|
int mantissa = ( halfValue ) & 0x03ff;
|
|
|
|
// Note: we use a union here to be able to access the bits of a float directly
|
|
union
|
|
{
|
|
unsigned int bits;
|
|
float floatValue;
|
|
} outFloat;
|
|
|
|
// Special cases first
|
|
if( exponent == 0 )
|
|
{
|
|
if( mantissa == 0 )
|
|
{
|
|
// If both exponent and mantissa are 0, the number is +/- 0
|
|
outFloat.bits = sign << 31;
|
|
return outFloat.floatValue; // Already done!
|
|
}
|
|
|
|
// If exponent is 0, it's a denormalized number, so we renormalize it
|
|
// Note: this is not terribly efficient, but oh well
|
|
while( ( mantissa & 0x00000400 ) == 0 )
|
|
{
|
|
mantissa <<= 1;
|
|
exponent--;
|
|
}
|
|
|
|
// The first bit is implicit, so we take it off and inc the exponent accordingly
|
|
exponent++;
|
|
mantissa &= ~(0x00000400);
|
|
}
|
|
else if( exponent == 31 ) // Special-case "numbers"
|
|
{
|
|
// If the exponent is 31, it's a special case number (+/- infinity or NAN).
|
|
// If the mantissa is 0, it's infinity, else it's NAN, but in either case, the packing
|
|
// method is the same
|
|
outFloat.bits = ( sign << 31 ) | 0x7f800000 | ( mantissa << 13 );
|
|
return outFloat.floatValue;
|
|
}
|
|
|
|
// Plain ol' normalized number, so adjust to the ranges a 32-bit float expects and repack
|
|
exponent += ( 127 - 15 );
|
|
mantissa <<= 13;
|
|
|
|
outFloat.bits = ( sign << 31 ) | ( exponent << 23 ) | mantissa;
|
|
return outFloat.floatValue;
|
|
}
|