mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-26 08:49:02 +00:00
Update cl_khr_extended_async_copies tests to the latest extension version (#1426)
* Update cl_khr_extended_async_copies tests to the latest version of the extension Update the 2D and 3D extended async copies tests. Previously they were based on an older provisional version of the extension. Also update the variable names to only use 'stride' to refer to the actual stride values. Previously the tests used 'stride' to refer to the end of one line or plane and the start of the next. This is not the commonly understood meaning. * Address cl_khr_extended_async_copies PR feedback * Remove unnecessary parenthesis in kernel code * Make variables `const` and rearrange so that we can reuse variables, rather than repeating expressions. * Add in missing vector size of 3 for 2D tests * Use C++ String literals for kernel code Rather than C strings use C++11 string literals to define the kernel code in the extended async-copy tests. Doing this makes the kernel code more readable. Co-authored-by: Ewan Crawford <ewan@codeplay.com>
This commit is contained in:
@@ -25,77 +25,81 @@
|
|||||||
#include "../../test_common/harness/conversions.h"
|
#include "../../test_common/harness/conversions.h"
|
||||||
#include "procs.h"
|
#include "procs.h"
|
||||||
|
|
||||||
static const char *async_global_to_local_kernel2D =
|
static const char *async_global_to_local_kernel2D = R"OpenCLC(
|
||||||
"#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
|
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
|
||||||
"%s\n" // optional pragma string
|
%s // optional pragma string
|
||||||
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
|
|
||||||
"%s *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int "
|
|
||||||
"lineCopiesPerWorkItem, int srcStride, int dstStride )\n"
|
|
||||||
"{\n"
|
|
||||||
" int i, j;\n"
|
|
||||||
// Zero the local storage first
|
|
||||||
" for(i=0; i<lineCopiesPerWorkItem; i++)\n"
|
|
||||||
" for(j=0; j<numElementsPerLine; j++)\n"
|
|
||||||
" localBuffer[ (get_local_id( 0 "
|
|
||||||
")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ] = "
|
|
||||||
"(%s)(%s)0;\n"
|
|
||||||
// Do this to verify all kernels are done zeroing the local buffer before we
|
|
||||||
// try the copy
|
|
||||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
|
||||||
" event_t event;\n"
|
|
||||||
" event = async_work_group_copy_2D2D( (__local %s*)localBuffer, "
|
|
||||||
"(__global const "
|
|
||||||
"%s*)(src+lineCopiesPerWorkgroup*get_group_id(0)*(numElementsPerLine + "
|
|
||||||
"srcStride)), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, "
|
|
||||||
"srcStride, dstStride, 0 );\n"
|
|
||||||
// Wait for the copy to complete, then verify by manually copying to the
|
|
||||||
// dest
|
|
||||||
" wait_group_events( 1, &event );\n"
|
|
||||||
" for(i=0; i<lineCopiesPerWorkItem; i++)\n"
|
|
||||||
" for(j=0; j<numElementsPerLine; j++)\n"
|
|
||||||
" dst[ (get_global_id( 0 "
|
|
||||||
")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ] = "
|
|
||||||
"localBuffer[ (get_local_id( 0 "
|
|
||||||
")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ];\n"
|
|
||||||
"}\n";
|
|
||||||
|
|
||||||
static const char *async_local_to_global_kernel2D =
|
__kernel void test_fn(const __global %s *src, __global %s *dst,
|
||||||
"#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
|
__local %s *localBuffer, int numElementsPerLine,
|
||||||
"%s\n" // optional pragma string
|
int lineCopiesPerWorkgroup, int lineCopiesPerWorkItem,
|
||||||
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
|
int srcStride, int dstStride) {
|
||||||
"%s *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int "
|
|
||||||
"lineCopiesPerWorkItem, int srcStride, int dstStride )\n"
|
|
||||||
"{\n"
|
|
||||||
" int i, j;\n"
|
|
||||||
// Zero the local storage first
|
// Zero the local storage first
|
||||||
" for(i=0; i<lineCopiesPerWorkItem; i++)\n"
|
for (int i = 0; i < lineCopiesPerWorkItem; i++) {
|
||||||
" for(j=0; j<numElementsPerLine; j++)\n"
|
for (int j = 0; j < numElementsPerLine; j++) {
|
||||||
" localBuffer[ (get_local_id( 0 "
|
const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
|
||||||
")*lineCopiesPerWorkItem+i)*(numElementsPerLine + srcStride)+j ] = "
|
localBuffer[index] = (%s)(%s)0;
|
||||||
"(%s)(%s)0;\n"
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Do this to verify all kernels are done zeroing the local buffer before we
|
// Do this to verify all kernels are done zeroing the local buffer before we
|
||||||
// try the copy
|
// try the copy
|
||||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
barrier( CLK_LOCAL_MEM_FENCE );
|
||||||
" for(i=0; i<lineCopiesPerWorkItem; i++)\n"
|
event_t event = async_work_group_copy_2D2D(localBuffer, 0, src,
|
||||||
" for(j=0; j<numElementsPerLine; j++)\n"
|
lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(%s),
|
||||||
" localBuffer[ (get_local_id( 0 "
|
(size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0);
|
||||||
")*lineCopiesPerWorkItem+i)*(numElementsPerLine + srcStride)+j ] = src[ "
|
|
||||||
"(get_global_id( 0 )*lineCopiesPerWorkItem+i)*(numElementsPerLine + "
|
// Wait for the copy to complete, then verify by manually copying to the dest
|
||||||
"srcStride)+j ];\n"
|
wait_group_events(1, &event);
|
||||||
// Do this to verify all kernels are done copying to the local buffer before
|
|
||||||
// we try the copy
|
for (int i = 0; i < lineCopiesPerWorkItem; i++) {
|
||||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
for (int j = 0; j < numElementsPerLine; j++) {
|
||||||
" event_t event;\n"
|
const local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
|
||||||
" event = async_work_group_copy_2D2D((__global "
|
const int global_index = (get_global_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
|
||||||
"%s*)(dst+lineCopiesPerWorkgroup*get_group_id(0)*(numElementsPerLine + "
|
dst[global_index] = localBuffer[local_index];
|
||||||
"dstStride)), (__local const %s*)localBuffer, (size_t)numElementsPerLine, "
|
}
|
||||||
"(size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0 );\n"
|
}
|
||||||
" wait_group_events( 1, &event );\n"
|
}
|
||||||
"}\n";
|
)OpenCLC";
|
||||||
|
|
||||||
|
static const char *async_local_to_global_kernel2D = R"OpenCLC(
|
||||||
|
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
|
||||||
|
%s // optional pragma string
|
||||||
|
|
||||||
|
__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
|
||||||
|
int numElementsPerLine, int lineCopiesPerWorkgroup,
|
||||||
|
int lineCopiesPerWorkItem, int srcStride, int dstStride) {
|
||||||
|
// Zero the local storage first
|
||||||
|
for (int i = 0; i < lineCopiesPerWorkItem; i++) {
|
||||||
|
for (int j = 0; j < numElementsPerLine; j++) {
|
||||||
|
const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * srcStride + j;
|
||||||
|
localBuffer[index] = (%s)(%s)0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
for (int i = 0; i < lineCopiesPerWorkItem; i++) {
|
||||||
|
for (int j = 0; j < numElementsPerLine; j++) {
|
||||||
|
const int local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * srcStride + j;
|
||||||
|
const int global_index = (get_global_id(0)*lineCopiesPerWorkItem + i) * srcStride + j;
|
||||||
|
localBuffer[local_index] = src[global_index];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do this to verify all kernels are done copying to the local buffer before we try the copy
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
event_t event = async_work_group_copy_2D2D(dst, lineCopiesPerWorkgroup * get_group_id(0) * dstStride,
|
||||||
|
localBuffer, 0, sizeof(%s), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride,
|
||||||
|
dstStride, 0 );
|
||||||
|
|
||||||
|
wait_group_events(1, &event);
|
||||||
|
};
|
||||||
|
)OpenCLC";
|
||||||
|
|
||||||
int test_copy2D(cl_device_id deviceID, cl_context context,
|
int test_copy2D(cl_device_id deviceID, cl_context context,
|
||||||
cl_command_queue queue, const char *kernelCode,
|
cl_command_queue queue, const char *kernelCode,
|
||||||
ExplicitType vecType, int vecSize, int srcStride, int dstStride,
|
ExplicitType vecType, int vecSize, int srcMargin, int dstMargin,
|
||||||
bool localIsDst)
|
bool localIsDst)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
@@ -114,8 +118,8 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
|
|||||||
vecSize);
|
vecSize);
|
||||||
|
|
||||||
size_t elementSize = get_explicit_type_size(vecType) * vecSize;
|
size_t elementSize = get_explicit_type_size(vecType) * vecSize;
|
||||||
log_info("Testing %s with srcStride = %d, dstStride = %d\n", vecNameString,
|
log_info("Testing %s with srcMargin = %d, dstMargin = %d\n", vecNameString,
|
||||||
srcStride, dstStride);
|
srcMargin, dstMargin);
|
||||||
|
|
||||||
cl_long max_local_mem_size;
|
cl_long max_local_mem_size;
|
||||||
error =
|
error =
|
||||||
@@ -153,7 +157,7 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
|
|||||||
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
|
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
|
||||||
: "",
|
: "",
|
||||||
vecNameString, vecNameString, vecNameString, vecNameString,
|
vecNameString, vecNameString, vecNameString, vecNameString,
|
||||||
get_explicit_type_name(vecType), vecNameString, vecNameString);
|
get_explicit_type_name(vecType), vecNameString);
|
||||||
// log_info("program: %s\n", programSource);
|
// log_info("program: %s\n", programSource);
|
||||||
programPtr = programSource;
|
programPtr = programSource;
|
||||||
|
|
||||||
@@ -180,12 +184,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
|
|||||||
if (max_workgroup_size > max_local_workgroup_size[0])
|
if (max_workgroup_size > max_local_workgroup_size[0])
|
||||||
max_workgroup_size = max_local_workgroup_size[0];
|
max_workgroup_size = max_local_workgroup_size[0];
|
||||||
|
|
||||||
size_t numElementsPerLine = 10;
|
const size_t numElementsPerLine = 10;
|
||||||
size_t lineCopiesPerWorkItem = 13;
|
const cl_int dstStride = numElementsPerLine + dstMargin;
|
||||||
|
const cl_int srcStride = numElementsPerLine + srcMargin;
|
||||||
|
|
||||||
elementSize =
|
elementSize =
|
||||||
get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
|
get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
|
||||||
size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem * elementSize
|
|
||||||
* (numElementsPerLine + (localIsDst ? dstStride : srcStride));
|
const size_t lineCopiesPerWorkItem = 13;
|
||||||
|
const size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem
|
||||||
|
* elementSize * (localIsDst ? dstStride : srcStride);
|
||||||
|
|
||||||
size_t maxLocalWorkgroupSize =
|
size_t maxLocalWorkgroupSize =
|
||||||
(((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
|
(((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
|
||||||
|
|
||||||
@@ -199,34 +208,39 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
|
|||||||
if (maxLocalWorkgroupSize > max_workgroup_size)
|
if (maxLocalWorkgroupSize > max_workgroup_size)
|
||||||
localWorkgroupSize = max_workgroup_size;
|
localWorkgroupSize = max_workgroup_size;
|
||||||
|
|
||||||
size_t maxTotalLinesIn = (max_alloc_size / elementSize + srcStride)
|
|
||||||
/ (numElementsPerLine + srcStride);
|
const size_t maxTotalLinesIn =
|
||||||
size_t maxTotalLinesOut = (max_alloc_size / elementSize + dstStride)
|
(max_alloc_size / elementSize + srcMargin) / srcStride;
|
||||||
/ (numElementsPerLine + dstStride);
|
const size_t maxTotalLinesOut =
|
||||||
size_t maxTotalLines = std::min(maxTotalLinesIn, maxTotalLinesOut);
|
(max_alloc_size / elementSize + dstMargin) / dstStride;
|
||||||
size_t maxLocalWorkgroups =
|
const size_t maxTotalLines = std::min(maxTotalLinesIn, maxTotalLinesOut);
|
||||||
|
const size_t maxLocalWorkgroups =
|
||||||
maxTotalLines / (localWorkgroupSize * lineCopiesPerWorkItem);
|
maxTotalLines / (localWorkgroupSize * lineCopiesPerWorkItem);
|
||||||
|
|
||||||
size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
|
const size_t localBufferSize =
|
||||||
- (localIsDst ? dstStride : srcStride);
|
localWorkgroupSize * localStorageSpacePerWorkitem
|
||||||
size_t numberOfLocalWorkgroups = std::min(1111, (int)maxLocalWorkgroups);
|
- (localIsDst ? dstMargin : srcMargin);
|
||||||
size_t totalLines =
|
const size_t numberOfLocalWorkgroups =
|
||||||
|
std::min(1111, (int)maxLocalWorkgroups);
|
||||||
|
const size_t totalLines =
|
||||||
numberOfLocalWorkgroups * localWorkgroupSize * lineCopiesPerWorkItem;
|
numberOfLocalWorkgroups * localWorkgroupSize * lineCopiesPerWorkItem;
|
||||||
size_t inBufferSize = elementSize
|
const size_t inBufferSize = elementSize
|
||||||
* (totalLines * numElementsPerLine + (totalLines - 1) * srcStride);
|
* (totalLines * numElementsPerLine + (totalLines - 1) * srcMargin);
|
||||||
size_t outBufferSize = elementSize
|
const size_t outBufferSize = elementSize
|
||||||
* (totalLines * numElementsPerLine + (totalLines - 1) * dstStride);
|
* (totalLines * numElementsPerLine + (totalLines - 1) * dstMargin);
|
||||||
size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
|
const size_t globalWorkgroupSize =
|
||||||
|
numberOfLocalWorkgroups * localWorkgroupSize;
|
||||||
|
|
||||||
inBuffer = (void *)malloc(inBufferSize);
|
inBuffer = (void *)malloc(inBufferSize);
|
||||||
outBuffer = (void *)malloc(outBufferSize);
|
outBuffer = (void *)malloc(outBufferSize);
|
||||||
outBufferCopy = (void *)malloc(outBufferSize);
|
outBufferCopy = (void *)malloc(outBufferSize);
|
||||||
|
|
||||||
cl_int lineCopiesPerWorkItemInt, numElementsPerLineInt,
|
const cl_int lineCopiesPerWorkItemInt =
|
||||||
lineCopiesPerWorkgroup;
|
static_cast<cl_int>(lineCopiesPerWorkItem);
|
||||||
lineCopiesPerWorkItemInt = (int)lineCopiesPerWorkItem;
|
const cl_int numElementsPerLineInt =
|
||||||
numElementsPerLineInt = (int)numElementsPerLine;
|
static_cast<cl_int>(numElementsPerLine);
|
||||||
lineCopiesPerWorkgroup = (int)(lineCopiesPerWorkItem * localWorkgroupSize);
|
const cl_int lineCopiesPerWorkgroup =
|
||||||
|
static_cast<cl_int>(lineCopiesPerWorkItem * localWorkgroupSize);
|
||||||
|
|
||||||
log_info(
|
log_info(
|
||||||
"Global: %d, local %d, local buffer %db, global in buffer %db, "
|
"Global: %d, local %d, local buffer %db, global in buffer %db, "
|
||||||
@@ -296,8 +310,8 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
|
|||||||
for (int j = 0; j < (int)numElementsPerLine * elementSize;
|
for (int j = 0; j < (int)numElementsPerLine * elementSize;
|
||||||
j += elementSize)
|
j += elementSize)
|
||||||
{
|
{
|
||||||
int inIdx = i * (numElementsPerLine + srcStride) + j;
|
int inIdx = i * srcStride + j;
|
||||||
int outIdx = i * (numElementsPerLine + dstStride) + j;
|
int outIdx = i * dstStride + j;
|
||||||
if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx,
|
if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx,
|
||||||
typeSize)
|
typeSize)
|
||||||
!= 0)
|
!= 0)
|
||||||
@@ -332,11 +346,10 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
|
|||||||
if (i < (int)(globalWorkgroupSize * lineCopiesPerWorkItem - 1)
|
if (i < (int)(globalWorkgroupSize * lineCopiesPerWorkItem - 1)
|
||||||
* elementSize)
|
* elementSize)
|
||||||
{
|
{
|
||||||
int outIdx = i * (numElementsPerLine + dstStride)
|
int outIdx = i * dstStride + numElementsPerLine * elementSize;
|
||||||
+ numElementsPerLine * elementSize;
|
|
||||||
if (memcmp(((char *)outBuffer) + outIdx,
|
if (memcmp(((char *)outBuffer) + outIdx,
|
||||||
((char *)outBufferCopy) + outIdx,
|
((char *)outBufferCopy) + outIdx,
|
||||||
dstStride * elementSize)
|
dstMargin * elementSize)
|
||||||
!= 0)
|
!= 0)
|
||||||
{
|
{
|
||||||
if (failuresPrinted == 0)
|
if (failuresPrinted == 0)
|
||||||
@@ -373,9 +386,12 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
|
|||||||
kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong,
|
kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong,
|
||||||
kULong, kFloat, kDouble, kNumExplicitTypes
|
kULong, kFloat, kDouble, kNumExplicitTypes
|
||||||
};
|
};
|
||||||
|
// The margins below represent the number of elements between the end of
|
||||||
|
// one line and the start of the next. The strides are equivalent to the
|
||||||
|
// length of the line plus the chosen margin.
|
||||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||||
unsigned int smallTypesStrideSizes[] = { 0, 10, 100 };
|
unsigned int smallTypesMarginSizes[] = { 0, 10, 100 };
|
||||||
unsigned int size, typeIndex, srcStride, dstStride;
|
unsigned int size, typeIndex, srcMargin, dstMargin;
|
||||||
|
|
||||||
int errors = 0;
|
int errors = 0;
|
||||||
|
|
||||||
@@ -401,19 +417,19 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
|
|||||||
if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
|
if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
|
||||||
<= 2) // small type
|
<= 2) // small type
|
||||||
{
|
{
|
||||||
for (srcStride = 0; srcStride < sizeof(smallTypesStrideSizes)
|
for (srcMargin = 0; srcMargin < sizeof(smallTypesMarginSizes)
|
||||||
/ sizeof(smallTypesStrideSizes[0]);
|
/ sizeof(smallTypesMarginSizes[0]);
|
||||||
srcStride++)
|
srcMargin++)
|
||||||
{
|
{
|
||||||
for (dstStride = 0;
|
for (dstMargin = 0;
|
||||||
dstStride < sizeof(smallTypesStrideSizes)
|
dstMargin < sizeof(smallTypesMarginSizes)
|
||||||
/ sizeof(smallTypesStrideSizes[0]);
|
/ sizeof(smallTypesMarginSizes[0]);
|
||||||
dstStride++)
|
dstMargin++)
|
||||||
{
|
{
|
||||||
if (test_copy2D(deviceID, context, queue, kernelCode,
|
if (test_copy2D(deviceID, context, queue, kernelCode,
|
||||||
vecType[typeIndex], vecSizes[size],
|
vecType[typeIndex], vecSizes[size],
|
||||||
smallTypesStrideSizes[srcStride],
|
smallTypesMarginSizes[srcMargin],
|
||||||
smallTypesStrideSizes[dstStride],
|
smallTypesMarginSizes[dstMargin],
|
||||||
localIsDst))
|
localIsDst))
|
||||||
{
|
{
|
||||||
errors++;
|
errors++;
|
||||||
|
|||||||
@@ -25,96 +25,95 @@
|
|||||||
#include "../../test_common/harness/conversions.h"
|
#include "../../test_common/harness/conversions.h"
|
||||||
#include "procs.h"
|
#include "procs.h"
|
||||||
|
|
||||||
static const char *async_global_to_local_kernel3D =
|
static const char *async_global_to_local_kernel3D = R"OpenCLC(
|
||||||
"#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
|
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
|
||||||
"%s\n" // optional pragma string
|
%s // optional pragma string
|
||||||
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
|
|
||||||
"%s *localBuffer, int numElementsPerLine, int numLines, int "
|
|
||||||
"planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
|
|
||||||
"int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
|
|
||||||
"{\n"
|
|
||||||
" int i, j, k;\n"
|
|
||||||
// Zero the local storage first
|
|
||||||
" for(i=0; i<planesCopiesPerWorkItem; i++)\n"
|
|
||||||
" for(j=0; j<numLines; j++)\n"
|
|
||||||
" for(k=0; k<numElementsPerLine; k++)\n"
|
|
||||||
" localBuffer[ (get_local_id( 0 "
|
|
||||||
")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
|
|
||||||
"numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
|
|
||||||
"dstLineStride) + k ] = (%s)(%s)0;\n"
|
|
||||||
// Do this to verify all kernels are done zeroing the local buffer before we
|
|
||||||
// try the copy
|
|
||||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
|
||||||
" event_t event;\n"
|
|
||||||
" event = async_work_group_copy_3D3D( (__local %s*)localBuffer, "
|
|
||||||
"(__global const "
|
|
||||||
"%s*)(src+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
|
|
||||||
"numElementsPerLine + numLines*srcLineStride + srcPlaneStride)), "
|
|
||||||
"(size_t)numElementsPerLine, (size_t)numLines, srcLineStride, "
|
|
||||||
"dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, dstPlaneStride, "
|
|
||||||
"0 );\n"
|
|
||||||
// Wait for the copy to complete, then verify by manually copying to the
|
|
||||||
// dest
|
|
||||||
" wait_group_events( 1, &event );\n"
|
|
||||||
" for(i=0; i<planesCopiesPerWorkItem; i++)\n"
|
|
||||||
" for(j=0; j<numLines; j++)\n"
|
|
||||||
" for(k=0; k<numElementsPerLine; k++)\n"
|
|
||||||
" dst[ (get_global_id( 0 "
|
|
||||||
")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
|
|
||||||
"numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
|
|
||||||
"dstLineStride) + k ] = localBuffer[ (get_local_id( 0 "
|
|
||||||
")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
|
|
||||||
"numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
|
|
||||||
"dstLineStride) + k ];\n"
|
|
||||||
"}\n";
|
|
||||||
|
|
||||||
static const char *async_local_to_global_kernel3D =
|
__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
|
||||||
"#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
|
int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup,
|
||||||
"%s\n" // optional pragma string
|
int planesCopiesPerWorkItem, int srcLineStride,
|
||||||
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
|
int dstLineStride, int srcPlaneStride, int dstPlaneStride ) {
|
||||||
"%s *localBuffer, int numElementsPerLine, int numLines, int "
|
|
||||||
"planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
|
|
||||||
"int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
|
|
||||||
"{\n"
|
|
||||||
" int i, j, k;\n"
|
|
||||||
// Zero the local storage first
|
// Zero the local storage first
|
||||||
" for(i=0; i<planesCopiesPerWorkItem; i++)\n"
|
for (int i = 0; i < planesCopiesPerWorkItem; i++) {
|
||||||
" for(j=0; j<numLines; j++)\n"
|
for (int j = 0; j < numLines; j++) {
|
||||||
" for(k=0; k<numElementsPerLine; k++)\n"
|
for (int k = 0; k < numElementsPerLine; k++) {
|
||||||
" localBuffer[ (get_local_id( 0 "
|
const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
|
||||||
")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
|
localBuffer[index] = (%s)(%s)0;
|
||||||
"numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
|
}
|
||||||
"srcLineStride) + k ] = (%s)(%s)0;\n"
|
}
|
||||||
// Do this to verify all kernels are done zeroing the local buffer before we
|
}
|
||||||
// try the copy
|
|
||||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
|
||||||
" for(i=0; i<planesCopiesPerWorkItem; i++)\n"
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
" for(j=0; j<numLines; j++)\n"
|
|
||||||
" for(k=0; k<numElementsPerLine; k++)\n"
|
event_t event = async_work_group_copy_3D3D(localBuffer, 0, src,
|
||||||
" localBuffer[ (get_local_id( 0 "
|
planesCopiesPerWorkgroup * get_group_id(0) * srcPlaneStride,
|
||||||
")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
|
sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines,
|
||||||
"numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
|
planesCopiesPerWorkgroup, srcLineStride, srcPlaneStride, dstLineStride,
|
||||||
"srcLineStride) + k ] = src[ (get_global_id( 0 "
|
dstPlaneStride, 0);
|
||||||
")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
|
|
||||||
"numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
|
// Wait for the copy to complete, then verify by manually copying to the dest
|
||||||
"srcLineStride) + k ];\n"
|
wait_group_events(1, &event);
|
||||||
// Do this to verify all kernels are done copying to the local buffer before
|
|
||||||
// we try the copy
|
for (int i = 0; i < planesCopiesPerWorkItem; i++) {
|
||||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
for (int j = 0; j < numLines; j++) {
|
||||||
" event_t event;\n"
|
for(int k = 0; k < numElementsPerLine; k++) {
|
||||||
" event = async_work_group_copy_3D3D((__global "
|
const int local_index = (get_local_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
|
||||||
"%s*)(dst+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
|
const int global_index = (get_global_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
|
||||||
"numElementsPerLine + numLines*dstLineStride + dstPlaneStride)), (__local "
|
dst[global_index] = localBuffer[local_index];
|
||||||
"const %s*)localBuffer, (size_t)numElementsPerLine, (size_t)numLines, "
|
}
|
||||||
"srcLineStride, dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, "
|
}
|
||||||
"dstPlaneStride, 0 );\n"
|
}
|
||||||
" wait_group_events( 1, &event );\n"
|
}
|
||||||
"}\n";
|
)OpenCLC";
|
||||||
|
|
||||||
|
static const char *async_local_to_global_kernel3D = R"OpenCLC(
|
||||||
|
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
|
||||||
|
%s // optional pragma string
|
||||||
|
|
||||||
|
__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
|
||||||
|
int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup,
|
||||||
|
int planesCopiesPerWorkItem, int srcLineStride,
|
||||||
|
int dstLineStride, int srcPlaneStride, int dstPlaneStride) {
|
||||||
|
// Zero the local storage first
|
||||||
|
for (int i = 0; i < planesCopiesPerWorkItem; i++) {
|
||||||
|
for (int j = 0; j < numLines; j++) {
|
||||||
|
for (int k = 0; k < numElementsPerLine; k++) {
|
||||||
|
const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j * srcLineStride + k;
|
||||||
|
localBuffer[index] = (%s)(%s)0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
for (int i=0; i < planesCopiesPerWorkItem; i++) {
|
||||||
|
for (int j=0; j < numLines; j++) {
|
||||||
|
for (int k=0; k < numElementsPerLine; k++) {
|
||||||
|
const int local_index = (get_local_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j * srcLineStride + k;
|
||||||
|
const int global_index = (get_global_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j*srcLineStride + k;
|
||||||
|
localBuffer[local_index] = src[global_index];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do this to verify all kernels are done copying to the local buffer before we try the copy
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
event_t event = async_work_group_copy_3D3D(dst,
|
||||||
|
planesCopiesPerWorkgroup * get_group_id(0) * dstPlaneStride, localBuffer, 0,
|
||||||
|
sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup,
|
||||||
|
srcLineStride, srcPlaneStride, dstLineStride, dstPlaneStride, 0);
|
||||||
|
|
||||||
|
wait_group_events(1, &event);
|
||||||
|
}
|
||||||
|
)OpenCLC";
|
||||||
|
|
||||||
int test_copy3D(cl_device_id deviceID, cl_context context,
|
int test_copy3D(cl_device_id deviceID, cl_context context,
|
||||||
cl_command_queue queue, const char *kernelCode,
|
cl_command_queue queue, const char *kernelCode,
|
||||||
ExplicitType vecType, int vecSize, int srcLineStride,
|
ExplicitType vecType, int vecSize, int srcLineMargin,
|
||||||
int dstLineStride, int srcPlaneStride, int dstPlaneStride,
|
int dstLineMargin, int srcPlaneMargin, int dstPlaneMargin,
|
||||||
bool localIsDst)
|
bool localIsDst)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
@@ -133,10 +132,10 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
|
|||||||
vecSize);
|
vecSize);
|
||||||
|
|
||||||
size_t elementSize = get_explicit_type_size(vecType) * vecSize;
|
size_t elementSize = get_explicit_type_size(vecType) * vecSize;
|
||||||
log_info("Testing %s with srcLineStride = %d, dstLineStride = %d, "
|
log_info("Testing %s with srcLineMargin = %d, dstLineMargin = %d, "
|
||||||
"srcPlaneStride = %d, dstPlaneStride = %d\n",
|
"srcPlaneMargin = %d, dstPlaneMargin = %d\n",
|
||||||
vecNameString, srcLineStride, dstLineStride, srcPlaneStride,
|
vecNameString, srcLineMargin, dstLineMargin, srcPlaneMargin,
|
||||||
dstPlaneStride);
|
dstPlaneMargin);
|
||||||
|
|
||||||
cl_long max_local_mem_size;
|
cl_long max_local_mem_size;
|
||||||
error =
|
error =
|
||||||
@@ -201,16 +200,20 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
|
|||||||
if (max_workgroup_size > max_local_workgroup_size[0])
|
if (max_workgroup_size > max_local_workgroup_size[0])
|
||||||
max_workgroup_size = max_local_workgroup_size[0];
|
max_workgroup_size = max_local_workgroup_size[0];
|
||||||
|
|
||||||
size_t numElementsPerLine = 10;
|
const size_t numElementsPerLine = 10;
|
||||||
size_t numLines = 13;
|
const cl_int dstLineStride = numElementsPerLine + dstLineMargin;
|
||||||
size_t planesCopiesPerWorkItem = 2;
|
const cl_int srcLineStride = numElementsPerLine + srcLineMargin;
|
||||||
|
|
||||||
|
const size_t numLines = 13;
|
||||||
|
const cl_int dstPlaneStride = (numLines * dstLineStride) + dstPlaneMargin;
|
||||||
|
const cl_int srcPlaneStride = (numLines * srcLineStride) + srcPlaneMargin;
|
||||||
|
|
||||||
elementSize =
|
elementSize =
|
||||||
get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
|
get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
|
||||||
size_t localStorageSpacePerWorkitem = elementSize
|
const size_t planesCopiesPerWorkItem = 2;
|
||||||
* (planesCopiesPerWorkItem
|
const size_t localStorageSpacePerWorkitem = elementSize
|
||||||
* (numLines * numElementsPerLine
|
* planesCopiesPerWorkItem
|
||||||
+ numLines * (localIsDst ? dstLineStride : srcLineStride)
|
* (localIsDst ? dstPlaneStride : srcPlaneStride);
|
||||||
+ (localIsDst ? dstPlaneStride : srcPlaneStride)));
|
|
||||||
size_t maxLocalWorkgroupSize =
|
size_t maxLocalWorkgroupSize =
|
||||||
(((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
|
(((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
|
||||||
|
|
||||||
@@ -224,42 +227,41 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
|
|||||||
if (maxLocalWorkgroupSize > max_workgroup_size)
|
if (maxLocalWorkgroupSize > max_workgroup_size)
|
||||||
localWorkgroupSize = max_workgroup_size;
|
localWorkgroupSize = max_workgroup_size;
|
||||||
|
|
||||||
size_t maxTotalPlanesIn = ((max_alloc_size / elementSize) + srcPlaneStride)
|
const size_t maxTotalPlanesIn =
|
||||||
/ ((numLines * numElementsPerLine + numLines * srcLineStride)
|
((max_alloc_size / elementSize) + srcPlaneMargin) / srcPlaneStride;
|
||||||
+ srcPlaneStride);
|
const size_t maxTotalPlanesOut =
|
||||||
size_t maxTotalPlanesOut = ((max_alloc_size / elementSize) + dstPlaneStride)
|
((max_alloc_size / elementSize) + dstPlaneMargin) / dstPlaneStride;
|
||||||
/ ((numLines * numElementsPerLine + numLines * dstLineStride)
|
const size_t maxTotalPlanes = std::min(maxTotalPlanesIn, maxTotalPlanesOut);
|
||||||
+ dstPlaneStride);
|
const size_t maxLocalWorkgroups =
|
||||||
size_t maxTotalPlanes = std::min(maxTotalPlanesIn, maxTotalPlanesOut);
|
|
||||||
size_t maxLocalWorkgroups =
|
|
||||||
maxTotalPlanes / (localWorkgroupSize * planesCopiesPerWorkItem);
|
maxTotalPlanes / (localWorkgroupSize * planesCopiesPerWorkItem);
|
||||||
|
|
||||||
size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
|
const size_t localBufferSize =
|
||||||
- (localIsDst ? dstPlaneStride : srcPlaneStride);
|
localWorkgroupSize * localStorageSpacePerWorkitem
|
||||||
size_t numberOfLocalWorkgroups = std::min(1111, (int)maxLocalWorkgroups);
|
- (localIsDst ? dstPlaneMargin : srcPlaneMargin);
|
||||||
size_t totalPlanes =
|
const size_t numberOfLocalWorkgroups =
|
||||||
|
std::min(1111, (int)maxLocalWorkgroups);
|
||||||
|
const size_t totalPlanes =
|
||||||
numberOfLocalWorkgroups * localWorkgroupSize * planesCopiesPerWorkItem;
|
numberOfLocalWorkgroups * localWorkgroupSize * planesCopiesPerWorkItem;
|
||||||
size_t inBufferSize = elementSize
|
const size_t inBufferSize = elementSize
|
||||||
* (totalPlanes
|
* (totalPlanes * numLines * srcLineStride
|
||||||
* (numLines * numElementsPerLine + numLines * srcLineStride)
|
+ (totalPlanes - 1) * srcPlaneMargin);
|
||||||
+ (totalPlanes - 1) * srcPlaneStride);
|
const size_t outBufferSize = elementSize
|
||||||
size_t outBufferSize = elementSize
|
* (totalPlanes * numLines * dstLineStride
|
||||||
* (totalPlanes
|
+ (totalPlanes - 1) * dstPlaneMargin);
|
||||||
* (numLines * numElementsPerLine + numLines * dstLineStride)
|
const size_t globalWorkgroupSize =
|
||||||
+ (totalPlanes - 1) * dstPlaneStride);
|
numberOfLocalWorkgroups * localWorkgroupSize;
|
||||||
size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
|
|
||||||
|
|
||||||
inBuffer = (void *)malloc(inBufferSize);
|
inBuffer = (void *)malloc(inBufferSize);
|
||||||
outBuffer = (void *)malloc(outBufferSize);
|
outBuffer = (void *)malloc(outBufferSize);
|
||||||
outBufferCopy = (void *)malloc(outBufferSize);
|
outBufferCopy = (void *)malloc(outBufferSize);
|
||||||
|
|
||||||
cl_int planesCopiesPerWorkItemInt, numElementsPerLineInt, numLinesInt,
|
const cl_int planesCopiesPerWorkItemInt =
|
||||||
planesCopiesPerWorkgroup;
|
static_cast<cl_int>(planesCopiesPerWorkItem);
|
||||||
planesCopiesPerWorkItemInt = (int)planesCopiesPerWorkItem;
|
const cl_int numElementsPerLineInt =
|
||||||
numElementsPerLineInt = (int)numElementsPerLine;
|
static_cast<cl_int>(numElementsPerLine);
|
||||||
numLinesInt = (int)numLines;
|
const cl_int numLinesInt = static_cast<cl_int>(numLines);
|
||||||
planesCopiesPerWorkgroup =
|
const cl_int planesCopiesPerWorkgroup =
|
||||||
(int)(planesCopiesPerWorkItem * localWorkgroupSize);
|
static_cast<cl_int>(planesCopiesPerWorkItem * localWorkgroupSize);
|
||||||
|
|
||||||
log_info("Global: %d, local %d, local buffer %db, global in buffer %db, "
|
log_info("Global: %d, local %d, local buffer %db, global in buffer %db, "
|
||||||
"global out buffer %db, each work group will copy %d planes and "
|
"global out buffer %db, each work group will copy %d planes and "
|
||||||
@@ -336,14 +338,8 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
|
|||||||
for (int k = 0; k < (int)numElementsPerLine * elementSize;
|
for (int k = 0; k < (int)numElementsPerLine * elementSize;
|
||||||
k += elementSize)
|
k += elementSize)
|
||||||
{
|
{
|
||||||
int inIdx = i
|
int inIdx = i * srcPlaneStride + j * srcLineStride + k;
|
||||||
* (numLines * numElementsPerLine
|
int outIdx = i * dstPlaneStride + j * dstLineStride + k;
|
||||||
+ numLines * srcLineStride + srcPlaneStride)
|
|
||||||
+ j * (numElementsPerLine + srcLineStride) + k;
|
|
||||||
int outIdx = i
|
|
||||||
* (numLines * numElementsPerLine
|
|
||||||
+ numLines * dstLineStride + dstPlaneStride)
|
|
||||||
+ j * (numElementsPerLine + dstLineStride) + k;
|
|
||||||
if (memcmp(((char *)inBuffer) + inIdx,
|
if (memcmp(((char *)inBuffer) + inIdx,
|
||||||
((char *)outBuffer) + outIdx, typeSize)
|
((char *)outBuffer) + outIdx, typeSize)
|
||||||
!= 0)
|
!= 0)
|
||||||
@@ -378,14 +374,11 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
|
|||||||
}
|
}
|
||||||
if (j < (int)numLines * elementSize)
|
if (j < (int)numLines * elementSize)
|
||||||
{
|
{
|
||||||
int outIdx = i
|
int outIdx = i * dstPlaneStride + j * dstLineStride
|
||||||
* (numLines * numElementsPerLine
|
|
||||||
+ numLines * dstLineStride + dstPlaneStride)
|
|
||||||
+ j * (numElementsPerLine + dstLineStride)
|
|
||||||
+ numElementsPerLine * elementSize;
|
+ numElementsPerLine * elementSize;
|
||||||
if (memcmp(((char *)outBuffer) + outIdx,
|
if (memcmp(((char *)outBuffer) + outIdx,
|
||||||
((char *)outBufferCopy) + outIdx,
|
((char *)outBufferCopy) + outIdx,
|
||||||
dstLineStride * elementSize)
|
dstLineMargin * elementSize)
|
||||||
!= 0)
|
!= 0)
|
||||||
{
|
{
|
||||||
if (failuresPrinted == 0)
|
if (failuresPrinted == 0)
|
||||||
@@ -409,14 +402,11 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
|
|||||||
if (i < (int)(globalWorkgroupSize * planesCopiesPerWorkItem - 1)
|
if (i < (int)(globalWorkgroupSize * planesCopiesPerWorkItem - 1)
|
||||||
* elementSize)
|
* elementSize)
|
||||||
{
|
{
|
||||||
int outIdx = i
|
int outIdx =
|
||||||
* (numLines * numElementsPerLine + numLines * dstLineStride
|
i * dstPlaneStride + numLines * dstLineStride * elementSize;
|
||||||
+ dstPlaneStride)
|
|
||||||
+ (numLines * elementSize) * (numElementsPerLine)
|
|
||||||
+ (numLines * elementSize) * (dstLineStride);
|
|
||||||
if (memcmp(((char *)outBuffer) + outIdx,
|
if (memcmp(((char *)outBuffer) + outIdx,
|
||||||
((char *)outBufferCopy) + outIdx,
|
((char *)outBufferCopy) + outIdx,
|
||||||
dstPlaneStride * elementSize)
|
dstPlaneMargin * elementSize)
|
||||||
!= 0)
|
!= 0)
|
||||||
{
|
{
|
||||||
if (failuresPrinted == 0)
|
if (failuresPrinted == 0)
|
||||||
@@ -453,10 +443,13 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
|
|||||||
kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong,
|
kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong,
|
||||||
kULong, kFloat, kDouble, kNumExplicitTypes
|
kULong, kFloat, kDouble, kNumExplicitTypes
|
||||||
};
|
};
|
||||||
|
// The margins below represent the number of elements between the end of
|
||||||
|
// one line or plane and the start of the next. The strides are equivalent
|
||||||
|
// to the size of the line or plane plus the chosen margin.
|
||||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||||
unsigned int smallTypesStrideSizes[] = { 0, 10, 100 };
|
unsigned int smallTypesMarginSizes[] = { 0, 10, 100 };
|
||||||
unsigned int size, typeIndex, srcLineStride, dstLineStride, srcPlaneStride,
|
unsigned int size, typeIndex, srcLineMargin, dstLineMargin, srcPlaneMargin,
|
||||||
dstPlaneStride;
|
dstPlaneMargin;
|
||||||
|
|
||||||
int errors = 0;
|
int errors = 0;
|
||||||
|
|
||||||
@@ -482,33 +475,33 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
|
|||||||
if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
|
if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
|
||||||
<= 2) // small type
|
<= 2) // small type
|
||||||
{
|
{
|
||||||
for (srcLineStride = 0;
|
for (srcLineMargin = 0;
|
||||||
srcLineStride < sizeof(smallTypesStrideSizes)
|
srcLineMargin < sizeof(smallTypesMarginSizes)
|
||||||
/ sizeof(smallTypesStrideSizes[0]);
|
/ sizeof(smallTypesMarginSizes[0]);
|
||||||
srcLineStride++)
|
srcLineMargin++)
|
||||||
{
|
{
|
||||||
for (dstLineStride = 0;
|
for (dstLineMargin = 0;
|
||||||
dstLineStride < sizeof(smallTypesStrideSizes)
|
dstLineMargin < sizeof(smallTypesMarginSizes)
|
||||||
/ sizeof(smallTypesStrideSizes[0]);
|
/ sizeof(smallTypesMarginSizes[0]);
|
||||||
dstLineStride++)
|
dstLineMargin++)
|
||||||
{
|
{
|
||||||
for (srcPlaneStride = 0;
|
for (srcPlaneMargin = 0;
|
||||||
srcPlaneStride < sizeof(smallTypesStrideSizes)
|
srcPlaneMargin < sizeof(smallTypesMarginSizes)
|
||||||
/ sizeof(smallTypesStrideSizes[0]);
|
/ sizeof(smallTypesMarginSizes[0]);
|
||||||
srcPlaneStride++)
|
srcPlaneMargin++)
|
||||||
{
|
{
|
||||||
for (dstPlaneStride = 0;
|
for (dstPlaneMargin = 0;
|
||||||
dstPlaneStride < sizeof(smallTypesStrideSizes)
|
dstPlaneMargin < sizeof(smallTypesMarginSizes)
|
||||||
/ sizeof(smallTypesStrideSizes[0]);
|
/ sizeof(smallTypesMarginSizes[0]);
|
||||||
dstPlaneStride++)
|
dstPlaneMargin++)
|
||||||
{
|
{
|
||||||
if (test_copy3D(
|
if (test_copy3D(
|
||||||
deviceID, context, queue, kernelCode,
|
deviceID, context, queue, kernelCode,
|
||||||
vecType[typeIndex], vecSizes[size],
|
vecType[typeIndex], vecSizes[size],
|
||||||
smallTypesStrideSizes[srcLineStride],
|
smallTypesMarginSizes[srcLineMargin],
|
||||||
smallTypesStrideSizes[dstLineStride],
|
smallTypesMarginSizes[dstLineMargin],
|
||||||
smallTypesStrideSizes[srcPlaneStride],
|
smallTypesMarginSizes[srcPlaneMargin],
|
||||||
smallTypesStrideSizes[dstPlaneStride],
|
smallTypesMarginSizes[dstPlaneMargin],
|
||||||
localIsDst))
|
localIsDst))
|
||||||
{
|
{
|
||||||
errors++;
|
errors++;
|
||||||
|
|||||||
Reference in New Issue
Block a user