mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Update cl_khr_extended_async_copies tests to the latest extension version (#1426)
* Update cl_khr_extended_async_copies tests to the latest version of the extension Update the 2D and 3D extended async copies tests. Previously they were based on an older provisional version of the extension. Also update the variable names to only use 'stride' to refer to the actual stride values. Previously the tests used 'stride' to refer to the end of one line or plane and the start of the next. This is not the commonly understood meaning. * Address cl_khr_extended_async_copies PR feedback * Remove unnecessary parenthesis in kernel code * Make variables `const` and rearrange so that we can reuse variables, rather than repeating expressions. * Add in missing vector size of 3 for 2D tests * Use C++ String literals for kernel code Rather than C strings use C++11 string literals to define the kernel code in the extended async-copy tests. Doing this makes the kernel code more readable. Co-authored-by: Ewan Crawford <ewan@codeplay.com>
This commit is contained in:
@@ -25,96 +25,95 @@
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "procs.h"
|
||||
|
||||
static const char *async_global_to_local_kernel3D =
|
||||
"#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
|
||||
"%s\n" // optional pragma string
|
||||
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
|
||||
"%s *localBuffer, int numElementsPerLine, int numLines, int "
|
||||
"planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
|
||||
"int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
|
||||
"{\n"
|
||||
" int i, j, k;\n"
|
||||
// Zero the local storage first
|
||||
" for(i=0; i<planesCopiesPerWorkItem; i++)\n"
|
||||
" for(j=0; j<numLines; j++)\n"
|
||||
" for(k=0; k<numElementsPerLine; k++)\n"
|
||||
" localBuffer[ (get_local_id( 0 "
|
||||
")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
|
||||
"numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
|
||||
"dstLineStride) + k ] = (%s)(%s)0;\n"
|
||||
// Do this to verify all kernels are done zeroing the local buffer before we
|
||||
// try the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" event_t event;\n"
|
||||
" event = async_work_group_copy_3D3D( (__local %s*)localBuffer, "
|
||||
"(__global const "
|
||||
"%s*)(src+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
|
||||
"numElementsPerLine + numLines*srcLineStride + srcPlaneStride)), "
|
||||
"(size_t)numElementsPerLine, (size_t)numLines, srcLineStride, "
|
||||
"dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, dstPlaneStride, "
|
||||
"0 );\n"
|
||||
// Wait for the copy to complete, then verify by manually copying to the
|
||||
// dest
|
||||
" wait_group_events( 1, &event );\n"
|
||||
" for(i=0; i<planesCopiesPerWorkItem; i++)\n"
|
||||
" for(j=0; j<numLines; j++)\n"
|
||||
" for(k=0; k<numElementsPerLine; k++)\n"
|
||||
" dst[ (get_global_id( 0 "
|
||||
")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
|
||||
"numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
|
||||
"dstLineStride) + k ] = localBuffer[ (get_local_id( 0 "
|
||||
")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
|
||||
"numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
|
||||
"dstLineStride) + k ];\n"
|
||||
"}\n";
|
||||
static const char *async_global_to_local_kernel3D = R"OpenCLC(
|
||||
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
|
||||
%s // optional pragma string
|
||||
|
||||
static const char *async_local_to_global_kernel3D =
|
||||
"#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
|
||||
"%s\n" // optional pragma string
|
||||
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
|
||||
"%s *localBuffer, int numElementsPerLine, int numLines, int "
|
||||
"planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
|
||||
"int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
|
||||
"{\n"
|
||||
" int i, j, k;\n"
|
||||
// Zero the local storage first
|
||||
" for(i=0; i<planesCopiesPerWorkItem; i++)\n"
|
||||
" for(j=0; j<numLines; j++)\n"
|
||||
" for(k=0; k<numElementsPerLine; k++)\n"
|
||||
" localBuffer[ (get_local_id( 0 "
|
||||
")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
|
||||
"numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
|
||||
"srcLineStride) + k ] = (%s)(%s)0;\n"
|
||||
// Do this to verify all kernels are done zeroing the local buffer before we
|
||||
// try the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" for(i=0; i<planesCopiesPerWorkItem; i++)\n"
|
||||
" for(j=0; j<numLines; j++)\n"
|
||||
" for(k=0; k<numElementsPerLine; k++)\n"
|
||||
" localBuffer[ (get_local_id( 0 "
|
||||
")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
|
||||
"numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
|
||||
"srcLineStride) + k ] = src[ (get_global_id( 0 "
|
||||
")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
|
||||
"numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
|
||||
"srcLineStride) + k ];\n"
|
||||
// Do this to verify all kernels are done copying to the local buffer before
|
||||
// we try the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" event_t event;\n"
|
||||
" event = async_work_group_copy_3D3D((__global "
|
||||
"%s*)(dst+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
|
||||
"numElementsPerLine + numLines*dstLineStride + dstPlaneStride)), (__local "
|
||||
"const %s*)localBuffer, (size_t)numElementsPerLine, (size_t)numLines, "
|
||||
"srcLineStride, dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, "
|
||||
"dstPlaneStride, 0 );\n"
|
||||
" wait_group_events( 1, &event );\n"
|
||||
"}\n";
|
||||
__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
|
||||
int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup,
|
||||
int planesCopiesPerWorkItem, int srcLineStride,
|
||||
int dstLineStride, int srcPlaneStride, int dstPlaneStride ) {
|
||||
// Zero the local storage first
|
||||
for (int i = 0; i < planesCopiesPerWorkItem; i++) {
|
||||
for (int j = 0; j < numLines; j++) {
|
||||
for (int k = 0; k < numElementsPerLine; k++) {
|
||||
const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
|
||||
localBuffer[index] = (%s)(%s)0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
event_t event = async_work_group_copy_3D3D(localBuffer, 0, src,
|
||||
planesCopiesPerWorkgroup * get_group_id(0) * srcPlaneStride,
|
||||
sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines,
|
||||
planesCopiesPerWorkgroup, srcLineStride, srcPlaneStride, dstLineStride,
|
||||
dstPlaneStride, 0);
|
||||
|
||||
// Wait for the copy to complete, then verify by manually copying to the dest
|
||||
wait_group_events(1, &event);
|
||||
|
||||
for (int i = 0; i < planesCopiesPerWorkItem; i++) {
|
||||
for (int j = 0; j < numLines; j++) {
|
||||
for(int k = 0; k < numElementsPerLine; k++) {
|
||||
const int local_index = (get_local_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
|
||||
const int global_index = (get_global_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
|
||||
dst[global_index] = localBuffer[local_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)OpenCLC";
|
||||
|
||||
static const char *async_local_to_global_kernel3D = R"OpenCLC(
|
||||
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
|
||||
%s // optional pragma string
|
||||
|
||||
__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
|
||||
int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup,
|
||||
int planesCopiesPerWorkItem, int srcLineStride,
|
||||
int dstLineStride, int srcPlaneStride, int dstPlaneStride) {
|
||||
// Zero the local storage first
|
||||
for (int i = 0; i < planesCopiesPerWorkItem; i++) {
|
||||
for (int j = 0; j < numLines; j++) {
|
||||
for (int k = 0; k < numElementsPerLine; k++) {
|
||||
const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j * srcLineStride + k;
|
||||
localBuffer[index] = (%s)(%s)0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for (int i=0; i < planesCopiesPerWorkItem; i++) {
|
||||
for (int j=0; j < numLines; j++) {
|
||||
for (int k=0; k < numElementsPerLine; k++) {
|
||||
const int local_index = (get_local_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j * srcLineStride + k;
|
||||
const int global_index = (get_global_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j*srcLineStride + k;
|
||||
localBuffer[local_index] = src[global_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Do this to verify all kernels are done copying to the local buffer before we try the copy
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
event_t event = async_work_group_copy_3D3D(dst,
|
||||
planesCopiesPerWorkgroup * get_group_id(0) * dstPlaneStride, localBuffer, 0,
|
||||
sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup,
|
||||
srcLineStride, srcPlaneStride, dstLineStride, dstPlaneStride, 0);
|
||||
|
||||
wait_group_events(1, &event);
|
||||
}
|
||||
)OpenCLC";
|
||||
|
||||
int test_copy3D(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue, const char *kernelCode,
|
||||
ExplicitType vecType, int vecSize, int srcLineStride,
|
||||
int dstLineStride, int srcPlaneStride, int dstPlaneStride,
|
||||
ExplicitType vecType, int vecSize, int srcLineMargin,
|
||||
int dstLineMargin, int srcPlaneMargin, int dstPlaneMargin,
|
||||
bool localIsDst)
|
||||
{
|
||||
int error;
|
||||
@@ -133,10 +132,10 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
|
||||
vecSize);
|
||||
|
||||
size_t elementSize = get_explicit_type_size(vecType) * vecSize;
|
||||
log_info("Testing %s with srcLineStride = %d, dstLineStride = %d, "
|
||||
"srcPlaneStride = %d, dstPlaneStride = %d\n",
|
||||
vecNameString, srcLineStride, dstLineStride, srcPlaneStride,
|
||||
dstPlaneStride);
|
||||
log_info("Testing %s with srcLineMargin = %d, dstLineMargin = %d, "
|
||||
"srcPlaneMargin = %d, dstPlaneMargin = %d\n",
|
||||
vecNameString, srcLineMargin, dstLineMargin, srcPlaneMargin,
|
||||
dstPlaneMargin);
|
||||
|
||||
cl_long max_local_mem_size;
|
||||
error =
|
||||
@@ -201,16 +200,20 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
|
||||
if (max_workgroup_size > max_local_workgroup_size[0])
|
||||
max_workgroup_size = max_local_workgroup_size[0];
|
||||
|
||||
size_t numElementsPerLine = 10;
|
||||
size_t numLines = 13;
|
||||
size_t planesCopiesPerWorkItem = 2;
|
||||
const size_t numElementsPerLine = 10;
|
||||
const cl_int dstLineStride = numElementsPerLine + dstLineMargin;
|
||||
const cl_int srcLineStride = numElementsPerLine + srcLineMargin;
|
||||
|
||||
const size_t numLines = 13;
|
||||
const cl_int dstPlaneStride = (numLines * dstLineStride) + dstPlaneMargin;
|
||||
const cl_int srcPlaneStride = (numLines * srcLineStride) + srcPlaneMargin;
|
||||
|
||||
elementSize =
|
||||
get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
|
||||
size_t localStorageSpacePerWorkitem = elementSize
|
||||
* (planesCopiesPerWorkItem
|
||||
* (numLines * numElementsPerLine
|
||||
+ numLines * (localIsDst ? dstLineStride : srcLineStride)
|
||||
+ (localIsDst ? dstPlaneStride : srcPlaneStride)));
|
||||
const size_t planesCopiesPerWorkItem = 2;
|
||||
const size_t localStorageSpacePerWorkitem = elementSize
|
||||
* planesCopiesPerWorkItem
|
||||
* (localIsDst ? dstPlaneStride : srcPlaneStride);
|
||||
size_t maxLocalWorkgroupSize =
|
||||
(((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
|
||||
|
||||
@@ -224,42 +227,41 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
|
||||
if (maxLocalWorkgroupSize > max_workgroup_size)
|
||||
localWorkgroupSize = max_workgroup_size;
|
||||
|
||||
size_t maxTotalPlanesIn = ((max_alloc_size / elementSize) + srcPlaneStride)
|
||||
/ ((numLines * numElementsPerLine + numLines * srcLineStride)
|
||||
+ srcPlaneStride);
|
||||
size_t maxTotalPlanesOut = ((max_alloc_size / elementSize) + dstPlaneStride)
|
||||
/ ((numLines * numElementsPerLine + numLines * dstLineStride)
|
||||
+ dstPlaneStride);
|
||||
size_t maxTotalPlanes = std::min(maxTotalPlanesIn, maxTotalPlanesOut);
|
||||
size_t maxLocalWorkgroups =
|
||||
const size_t maxTotalPlanesIn =
|
||||
((max_alloc_size / elementSize) + srcPlaneMargin) / srcPlaneStride;
|
||||
const size_t maxTotalPlanesOut =
|
||||
((max_alloc_size / elementSize) + dstPlaneMargin) / dstPlaneStride;
|
||||
const size_t maxTotalPlanes = std::min(maxTotalPlanesIn, maxTotalPlanesOut);
|
||||
const size_t maxLocalWorkgroups =
|
||||
maxTotalPlanes / (localWorkgroupSize * planesCopiesPerWorkItem);
|
||||
|
||||
size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
|
||||
- (localIsDst ? dstPlaneStride : srcPlaneStride);
|
||||
size_t numberOfLocalWorkgroups = std::min(1111, (int)maxLocalWorkgroups);
|
||||
size_t totalPlanes =
|
||||
const size_t localBufferSize =
|
||||
localWorkgroupSize * localStorageSpacePerWorkitem
|
||||
- (localIsDst ? dstPlaneMargin : srcPlaneMargin);
|
||||
const size_t numberOfLocalWorkgroups =
|
||||
std::min(1111, (int)maxLocalWorkgroups);
|
||||
const size_t totalPlanes =
|
||||
numberOfLocalWorkgroups * localWorkgroupSize * planesCopiesPerWorkItem;
|
||||
size_t inBufferSize = elementSize
|
||||
* (totalPlanes
|
||||
* (numLines * numElementsPerLine + numLines * srcLineStride)
|
||||
+ (totalPlanes - 1) * srcPlaneStride);
|
||||
size_t outBufferSize = elementSize
|
||||
* (totalPlanes
|
||||
* (numLines * numElementsPerLine + numLines * dstLineStride)
|
||||
+ (totalPlanes - 1) * dstPlaneStride);
|
||||
size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
|
||||
const size_t inBufferSize = elementSize
|
||||
* (totalPlanes * numLines * srcLineStride
|
||||
+ (totalPlanes - 1) * srcPlaneMargin);
|
||||
const size_t outBufferSize = elementSize
|
||||
* (totalPlanes * numLines * dstLineStride
|
||||
+ (totalPlanes - 1) * dstPlaneMargin);
|
||||
const size_t globalWorkgroupSize =
|
||||
numberOfLocalWorkgroups * localWorkgroupSize;
|
||||
|
||||
inBuffer = (void *)malloc(inBufferSize);
|
||||
outBuffer = (void *)malloc(outBufferSize);
|
||||
outBufferCopy = (void *)malloc(outBufferSize);
|
||||
|
||||
cl_int planesCopiesPerWorkItemInt, numElementsPerLineInt, numLinesInt,
|
||||
planesCopiesPerWorkgroup;
|
||||
planesCopiesPerWorkItemInt = (int)planesCopiesPerWorkItem;
|
||||
numElementsPerLineInt = (int)numElementsPerLine;
|
||||
numLinesInt = (int)numLines;
|
||||
planesCopiesPerWorkgroup =
|
||||
(int)(planesCopiesPerWorkItem * localWorkgroupSize);
|
||||
const cl_int planesCopiesPerWorkItemInt =
|
||||
static_cast<cl_int>(planesCopiesPerWorkItem);
|
||||
const cl_int numElementsPerLineInt =
|
||||
static_cast<cl_int>(numElementsPerLine);
|
||||
const cl_int numLinesInt = static_cast<cl_int>(numLines);
|
||||
const cl_int planesCopiesPerWorkgroup =
|
||||
static_cast<cl_int>(planesCopiesPerWorkItem * localWorkgroupSize);
|
||||
|
||||
log_info("Global: %d, local %d, local buffer %db, global in buffer %db, "
|
||||
"global out buffer %db, each work group will copy %d planes and "
|
||||
@@ -336,14 +338,8 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
|
||||
for (int k = 0; k < (int)numElementsPerLine * elementSize;
|
||||
k += elementSize)
|
||||
{
|
||||
int inIdx = i
|
||||
* (numLines * numElementsPerLine
|
||||
+ numLines * srcLineStride + srcPlaneStride)
|
||||
+ j * (numElementsPerLine + srcLineStride) + k;
|
||||
int outIdx = i
|
||||
* (numLines * numElementsPerLine
|
||||
+ numLines * dstLineStride + dstPlaneStride)
|
||||
+ j * (numElementsPerLine + dstLineStride) + k;
|
||||
int inIdx = i * srcPlaneStride + j * srcLineStride + k;
|
||||
int outIdx = i * dstPlaneStride + j * dstLineStride + k;
|
||||
if (memcmp(((char *)inBuffer) + inIdx,
|
||||
((char *)outBuffer) + outIdx, typeSize)
|
||||
!= 0)
|
||||
@@ -378,14 +374,11 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
|
||||
}
|
||||
if (j < (int)numLines * elementSize)
|
||||
{
|
||||
int outIdx = i
|
||||
* (numLines * numElementsPerLine
|
||||
+ numLines * dstLineStride + dstPlaneStride)
|
||||
+ j * (numElementsPerLine + dstLineStride)
|
||||
int outIdx = i * dstPlaneStride + j * dstLineStride
|
||||
+ numElementsPerLine * elementSize;
|
||||
if (memcmp(((char *)outBuffer) + outIdx,
|
||||
((char *)outBufferCopy) + outIdx,
|
||||
dstLineStride * elementSize)
|
||||
dstLineMargin * elementSize)
|
||||
!= 0)
|
||||
{
|
||||
if (failuresPrinted == 0)
|
||||
@@ -409,14 +402,11 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
|
||||
if (i < (int)(globalWorkgroupSize * planesCopiesPerWorkItem - 1)
|
||||
* elementSize)
|
||||
{
|
||||
int outIdx = i
|
||||
* (numLines * numElementsPerLine + numLines * dstLineStride
|
||||
+ dstPlaneStride)
|
||||
+ (numLines * elementSize) * (numElementsPerLine)
|
||||
+ (numLines * elementSize) * (dstLineStride);
|
||||
int outIdx =
|
||||
i * dstPlaneStride + numLines * dstLineStride * elementSize;
|
||||
if (memcmp(((char *)outBuffer) + outIdx,
|
||||
((char *)outBufferCopy) + outIdx,
|
||||
dstPlaneStride * elementSize)
|
||||
dstPlaneMargin * elementSize)
|
||||
!= 0)
|
||||
{
|
||||
if (failuresPrinted == 0)
|
||||
@@ -453,10 +443,13 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
|
||||
kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong,
|
||||
kULong, kFloat, kDouble, kNumExplicitTypes
|
||||
};
|
||||
// The margins below represent the number of elements between the end of
|
||||
// one line or plane and the start of the next. The strides are equivalent
|
||||
// to the size of the line or plane plus the chosen margin.
|
||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||
unsigned int smallTypesStrideSizes[] = { 0, 10, 100 };
|
||||
unsigned int size, typeIndex, srcLineStride, dstLineStride, srcPlaneStride,
|
||||
dstPlaneStride;
|
||||
unsigned int smallTypesMarginSizes[] = { 0, 10, 100 };
|
||||
unsigned int size, typeIndex, srcLineMargin, dstLineMargin, srcPlaneMargin,
|
||||
dstPlaneMargin;
|
||||
|
||||
int errors = 0;
|
||||
|
||||
@@ -482,33 +475,33 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
|
||||
if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
|
||||
<= 2) // small type
|
||||
{
|
||||
for (srcLineStride = 0;
|
||||
srcLineStride < sizeof(smallTypesStrideSizes)
|
||||
/ sizeof(smallTypesStrideSizes[0]);
|
||||
srcLineStride++)
|
||||
for (srcLineMargin = 0;
|
||||
srcLineMargin < sizeof(smallTypesMarginSizes)
|
||||
/ sizeof(smallTypesMarginSizes[0]);
|
||||
srcLineMargin++)
|
||||
{
|
||||
for (dstLineStride = 0;
|
||||
dstLineStride < sizeof(smallTypesStrideSizes)
|
||||
/ sizeof(smallTypesStrideSizes[0]);
|
||||
dstLineStride++)
|
||||
for (dstLineMargin = 0;
|
||||
dstLineMargin < sizeof(smallTypesMarginSizes)
|
||||
/ sizeof(smallTypesMarginSizes[0]);
|
||||
dstLineMargin++)
|
||||
{
|
||||
for (srcPlaneStride = 0;
|
||||
srcPlaneStride < sizeof(smallTypesStrideSizes)
|
||||
/ sizeof(smallTypesStrideSizes[0]);
|
||||
srcPlaneStride++)
|
||||
for (srcPlaneMargin = 0;
|
||||
srcPlaneMargin < sizeof(smallTypesMarginSizes)
|
||||
/ sizeof(smallTypesMarginSizes[0]);
|
||||
srcPlaneMargin++)
|
||||
{
|
||||
for (dstPlaneStride = 0;
|
||||
dstPlaneStride < sizeof(smallTypesStrideSizes)
|
||||
/ sizeof(smallTypesStrideSizes[0]);
|
||||
dstPlaneStride++)
|
||||
for (dstPlaneMargin = 0;
|
||||
dstPlaneMargin < sizeof(smallTypesMarginSizes)
|
||||
/ sizeof(smallTypesMarginSizes[0]);
|
||||
dstPlaneMargin++)
|
||||
{
|
||||
if (test_copy3D(
|
||||
deviceID, context, queue, kernelCode,
|
||||
vecType[typeIndex], vecSizes[size],
|
||||
smallTypesStrideSizes[srcLineStride],
|
||||
smallTypesStrideSizes[dstLineStride],
|
||||
smallTypesStrideSizes[srcPlaneStride],
|
||||
smallTypesStrideSizes[dstPlaneStride],
|
||||
smallTypesMarginSizes[srcLineMargin],
|
||||
smallTypesMarginSizes[dstLineMargin],
|
||||
smallTypesMarginSizes[srcPlaneMargin],
|
||||
smallTypesMarginSizes[dstPlaneMargin],
|
||||
localIsDst))
|
||||
{
|
||||
errors++;
|
||||
|
||||
Reference in New Issue
Block a user