mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Remove unsupported code (#1211)
* Remove code for runtime measurement The GetTime() and associated functions are not fully implemented on Linux. This functionality is assumed to be untested, or unused at best. Reduce differences between tests by removing this unnecessary feature. It can be (re-)implemented later, if desired, once the math_brute_force component is in better shape. Signed-off-by: Marco Antognini <marco.antognini@arm.com> * Coalesce if-statements Signed-off-by: Marco Antognini <marco.antognini@arm.com> * Keep else branch Address comments. Signed-off-by: Marco Antognini <marco.antognini@arm.com>
This commit is contained in:
@@ -436,95 +436,10 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input arrays
|
||||
double *p = (double *)gIn;
|
||||
double *p2 = (double *)gIn2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
{
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p2[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 2,
|
||||
sizeof(gInBuffer2), &gInBuffer2)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (i = 0; i < PERF_LOOP_COUNT; i++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
|
||||
1, NULL, &localCount, NULL,
|
||||
0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (BUFFER_SIZE / sizeof(double));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -428,95 +428,10 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input arrays
|
||||
cl_uint *p = (cl_uint *)gIn;
|
||||
cl_uint *p2 = (cl_uint *)gIn2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000;
|
||||
p2[j] = 0x3fc00000;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 2,
|
||||
sizeof(gInBuffer2), &gInBuffer2)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (i = 0; i < PERF_LOOP_COUNT; i++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
|
||||
1, NULL, &localCount, NULL,
|
||||
0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (BUFFER_SIZE / sizeof(float));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -437,96 +437,10 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input arrays
|
||||
double *p = (double *)gIn;
|
||||
cl_int *p2 = (cl_int *)gIn2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
{
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p2[j] = 3;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error =
|
||||
clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
BUFFER_SIZE / 2, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 2,
|
||||
sizeof(gInBuffer2), &gInBuffer2)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (i = 0; i < PERF_LOOP_COUNT; i++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
|
||||
1, NULL, &localCount, NULL,
|
||||
0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (BUFFER_SIZE / sizeof(double));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -430,95 +430,10 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input arrays
|
||||
uint32_t *p = (uint32_t *)gIn;
|
||||
uint32_t *p2 = (uint32_t *)gIn2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
p[j] = (genrand_int32(d) & ~0x40000000) | 0x38000000;
|
||||
p2[j] = 3;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 2,
|
||||
sizeof(gInBuffer2), &gInBuffer2)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (i = 0; i < PERF_LOOP_COUNT; i++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
|
||||
1, NULL, &localCount, NULL,
|
||||
0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (BUFFER_SIZE / sizeof(float));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -430,95 +430,10 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input arrays
|
||||
double *p = (double *)gIn;
|
||||
double *p2 = (double *)gIn2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
{
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p2[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 2,
|
||||
sizeof(gInBuffer2), &gInBuffer2)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (i = 0; i < PERF_LOOP_COUNT; i++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
|
||||
1, NULL, &localCount, NULL,
|
||||
0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (BUFFER_SIZE / sizeof(double));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -422,95 +422,10 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input arrays
|
||||
uint32_t *p = (uint32_t *)gIn;
|
||||
uint32_t *p2 = (uint32_t *)gIn2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000;
|
||||
p2[j] = 0x3fc00000;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 2,
|
||||
sizeof(gInBuffer2), &gInBuffer2)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (i = 0; i < PERF_LOOP_COUNT; i++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
|
||||
1, NULL, &localCount, NULL,
|
||||
0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (BUFFER_SIZE / sizeof(float));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -565,96 +565,10 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
for (j = 0; j < bufferSize / sizeof(cl_double); j++)
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
bufferSize, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||
size_t localCount = (bufferSize + vectorSize - 1)
|
||||
/ vectorSize; // bufferSize / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
|
||||
&gOutBuffer2[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer2),
|
||||
&gInBuffer2)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(double));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -550,98 +550,10 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
cl_uint *p = (cl_uint *)gIn;
|
||||
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||
{
|
||||
p[j] = genrand_int32(d);
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
bufferSize, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||
size_t localCount = (bufferSize + vectorSize - 1)
|
||||
/ vectorSize; // bufferSize / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
|
||||
&gOutBuffer2[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer2),
|
||||
&gInBuffer2)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(float));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -289,73 +289,6 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
for (j = 0; j < bufferSize / sizeof(cl_double); j++)
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(double));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -286,74 +286,6 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
uint32_t *p = (uint32_t *)gIn;
|
||||
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||
p[j] = genrand_int32(d);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||
size_t localCount = (bufferSize + vectorSize - 1)
|
||||
/ vectorSize; // bufferSize / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(float));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -406,93 +406,6 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input arrays
|
||||
cl_ulong *p = (cl_ulong *)gIn;
|
||||
cl_ulong *p2 = (cl_ulong *)gIn2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
{
|
||||
p[j] =
|
||||
(cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32);
|
||||
p2[j] =
|
||||
(cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32);
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 2,
|
||||
sizeof(gInBuffer2), &gInBuffer2)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (i = 0; i < PERF_LOOP_COUNT; i++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
|
||||
1, NULL, &localCount, NULL,
|
||||
0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (BUFFER_SIZE / sizeof(double));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -397,91 +397,6 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input arrays
|
||||
cl_uint *p = (cl_uint *)gIn;
|
||||
cl_uint *p2 = (cl_uint *)gIn2;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
{
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 2,
|
||||
sizeof(gInBuffer2), &gInBuffer2)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (i = 0; i < PERF_LOOP_COUNT; i++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
|
||||
1, NULL, &localCount, NULL,
|
||||
0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (BUFFER_SIZE / sizeof(float));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -272,74 +272,6 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
cl_ulong *p = (cl_ulong *)gIn;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (i = 0; i < PERF_LOOP_COUNT; i++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
|
||||
1, NULL, &localCount, NULL,
|
||||
0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (BUFFER_SIZE / sizeof(double));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -272,74 +272,6 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
cl_uint *p = (cl_uint *)gIn;
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
p[j] = genrand_int32(d);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (i = 0; i < PERF_LOOP_COUNT; i++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
|
||||
1, NULL, &localCount, NULL,
|
||||
0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (BUFFER_SIZE / sizeof(float));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -285,111 +285,11 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
double *p2 = (double *)gIn2;
|
||||
double *p3 = (double *)gIn3;
|
||||
for (j = 0; j < bufferSize / sizeof(double); j++)
|
||||
{
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p2[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p3[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
bufferSize, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
|
||||
bufferSize, gIn3, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||
size_t localCount = (bufferSize + vectorSize - 1)
|
||||
/ vectorSize; // bufferSize / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
|
||||
&gInBuffer2)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
|
||||
&gInBuffer3)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(double));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
|
||||
maxErrorVal3);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -284,109 +284,11 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
cl_uint *p = (cl_uint *)gIn;
|
||||
cl_uint *p2 = (cl_uint *)gIn2;
|
||||
cl_uint *p3 = (cl_uint *)gIn3;
|
||||
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||
{
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
p3[j] = genrand_int32(d);
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
bufferSize, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
|
||||
bufferSize, gIn3, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||
size_t localCount = (bufferSize + vectorSize - 1)
|
||||
/ vectorSize; // bufferSize / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
|
||||
&gInBuffer2)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
|
||||
&gInBuffer3)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(float));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
|
||||
maxErrorVal3);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -62,8 +62,6 @@ static int32_t gEndTestNumber = -1;
|
||||
int gSkipCorrectnessTesting = 0;
|
||||
int gStopOnError = 0;
|
||||
static bool gSkipRestOfTests;
|
||||
int gMeasureTimes = 0;
|
||||
int gReportAverageTimes = 0;
|
||||
int gForceFTZ = 0;
|
||||
int gWimpyMode = 0;
|
||||
int gHasDouble = 0;
|
||||
@@ -87,7 +85,6 @@ int gCheckTininessBeforeRounding = 1;
|
||||
int gIsInRTZMode = 0;
|
||||
uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
|
||||
uint32_t gMinVectorSizeIndex = 0;
|
||||
const char *method[] = { "Best", "Average" };
|
||||
void *gIn = NULL;
|
||||
void *gIn2 = NULL;
|
||||
void *gIn3 = NULL;
|
||||
@@ -813,24 +810,8 @@ int main(int argc, const char *argv[])
|
||||
else if (gStopOnError)
|
||||
vlog("Stopping at first error.\n");
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
vlog("%s times are reported at right (cycles per element):\n",
|
||||
method[gReportAverageTimes]);
|
||||
vlog("\n");
|
||||
if (gSkipCorrectnessTesting)
|
||||
vlog(" \t ");
|
||||
else
|
||||
vlog(" \t ");
|
||||
if (gWimpyMode) vlog(" ");
|
||||
for (int i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
|
||||
vlog("\t float%s", sizeNames[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
vlog(" \t ");
|
||||
if (gWimpyMode) vlog(" ");
|
||||
}
|
||||
vlog(" \t ");
|
||||
if (gWimpyMode) vlog(" ");
|
||||
if (!gSkipCorrectnessTesting) vlog("\t max_ulps");
|
||||
|
||||
vlog("\n-------------------------------------------------------------------"
|
||||
@@ -905,8 +886,6 @@ static int ParseArgs(int argc, const char **argv)
|
||||
optionFound = 1;
|
||||
switch (*arg)
|
||||
{
|
||||
case 'a': gReportAverageTimes ^= 1; break;
|
||||
|
||||
case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
|
||||
|
||||
case 'd': gHasDouble ^= 1; break;
|
||||
@@ -927,8 +906,6 @@ static int ParseArgs(int argc, const char **argv)
|
||||
|
||||
case 's': gStopOnError ^= 1; break;
|
||||
|
||||
case 't': gMeasureTimes ^= 1; break;
|
||||
|
||||
case 'v': gVerboseBruteForce ^= 1; break;
|
||||
|
||||
case 'w': // wimpy mode
|
||||
@@ -970,7 +947,6 @@ static int ParseArgs(int argc, const char **argv)
|
||||
gMinVectorSizeIndex = 4;
|
||||
gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
|
||||
break;
|
||||
break;
|
||||
|
||||
default:
|
||||
vlog(" <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg);
|
||||
@@ -1053,9 +1029,8 @@ static void PrintFunctions(void)
|
||||
|
||||
static void PrintUsage(void)
|
||||
{
|
||||
vlog("%s [-acglstz]: <optional: math function names>\n", appName);
|
||||
vlog("%s [-cglsz]: <optional: math function names>\n", appName);
|
||||
vlog("\toptions:\n");
|
||||
vlog("\t\t-a\tReport average times instead of best times\n");
|
||||
vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
|
||||
"off)\n");
|
||||
vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
|
||||
@@ -1070,7 +1045,6 @@ static void PrintUsage(void)
|
||||
"accuracy checks.)\n");
|
||||
vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n");
|
||||
vlog("\t\t-s\tStop on error\n");
|
||||
vlog("\t\t-t\tToggle timing (on by default)\n");
|
||||
vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
|
||||
vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
|
||||
"1-10, default factor(%u)\n",
|
||||
@@ -1809,51 +1783,6 @@ float Abs_Error(float test, double reference)
|
||||
return fabs((float)(reference - (double)test));
|
||||
}
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <mach/mach_time.h>
|
||||
#endif
|
||||
|
||||
uint64_t GetTime(void)
|
||||
{
|
||||
#if defined(__APPLE__)
|
||||
return mach_absolute_time();
|
||||
#elif defined(_WIN32) && defined(_MSC_VER)
|
||||
return ReadTime();
|
||||
#else
|
||||
// mach_absolute_time is a high precision timer with precision < 1 microsecond.
|
||||
#warning need accurate clock here. Times are invalid.
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
/* function is defined in "compat.h" */
|
||||
#else
|
||||
double SubtractTime(uint64_t endTime, uint64_t startTime)
|
||||
{
|
||||
uint64_t diff = endTime - startTime;
|
||||
static double conversion = 0.0;
|
||||
|
||||
if (0.0 == conversion)
|
||||
{
|
||||
#if defined(__APPLE__)
|
||||
mach_timebase_info_data_t info = { 0, 0 };
|
||||
kern_return_t err = mach_timebase_info(&info);
|
||||
if (0 == err)
|
||||
conversion = 1e-9 * (double)info.numer / (double)info.denom;
|
||||
#else
|
||||
// This function consumes output from GetTime() above, and converts the time to
|
||||
// secionds.
|
||||
#warning need accurate ticks to seconds conversion factor here. Times are invalid.
|
||||
#endif
|
||||
}
|
||||
|
||||
// strictly speaking we should also be subtracting out timer latency here
|
||||
return conversion * (double)diff;
|
||||
}
|
||||
#endif
|
||||
|
||||
cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
|
||||
{
|
||||
if (0 == (x & (x - 1))) return x;
|
||||
|
||||
@@ -723,109 +723,11 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
double *p2 = (double *)gIn2;
|
||||
double *p3 = (double *)gIn3;
|
||||
for (j = 0; j < bufferSize / sizeof(double); j++)
|
||||
{
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p2[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p3[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
bufferSize, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
|
||||
bufferSize, gIn3, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||
size_t localCount = (bufferSize + vectorSize - 1)
|
||||
/ vectorSize; // bufferSize / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
|
||||
&gInBuffer2)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
|
||||
&gInBuffer3)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(double));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
|
||||
maxErrorVal3);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -858,109 +858,11 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
cl_uint *p = (cl_uint *)gIn;
|
||||
cl_uint *p2 = (cl_uint *)gIn2;
|
||||
cl_uint *p3 = (cl_uint *)gIn3;
|
||||
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||
{
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
p3[j] = genrand_int32(d);
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
|
||||
bufferSize, gIn2, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
|
||||
bufferSize, gIn3, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||
size_t localCount = (bufferSize + vectorSize - 1)
|
||||
/ vectorSize; // bufferSize / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
|
||||
&gInBuffer2)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
|
||||
&gInBuffer3)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(float));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
|
||||
maxErrorVal3);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -290,85 +290,10 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
|
||||
if (strstr(f->name, "exp"))
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
p[j] = (double)genrand_real1(d);
|
||||
else if (strstr(f->name, "log"))
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
p[j] = fabs(DoubleFromUInt32(genrand_int32(d)));
|
||||
else
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(double); j++)
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_double) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (i = 0; i < PERF_LOOP_COUNT; i++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
|
||||
1, NULL, &localCount, NULL,
|
||||
0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (BUFFER_SIZE / sizeof(double));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting) vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -315,85 +315,10 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog(" (rlx skip correctness testing)\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
uint32_t *p = (uint32_t *)gIn;
|
||||
if (strstr(f->name, "exp") || strstr(f->name, "sin")
|
||||
|| strstr(f->name, "cos") || strstr(f->name, "tan"))
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
((float *)p)[j] = (float)genrand_real1(d);
|
||||
else if (strstr(f->name, "log"))
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
p[j] = genrand_int32(d) & 0x7fffffff;
|
||||
else
|
||||
for (j = 0; j < BUFFER_SIZE / sizeof(float); j++)
|
||||
p[j] = genrand_int32(d);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
BUFFER_SIZE, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeof(cl_float) * sizeValues[j];
|
||||
size_t localCount = (BUFFER_SIZE + vectorSize - 1)
|
||||
/ vectorSize; // BUFFER_SIZE / vectorSize rounded up
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 0,
|
||||
sizeof(gOutBuffer[j]), &gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(test_info.k[j][0], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(test_info.programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (i = 0; i < PERF_LOOP_COUNT; i++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0],
|
||||
1, NULL, &localCount, NULL,
|
||||
0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (BUFFER_SIZE / sizeof(float));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting) vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -429,84 +429,11 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
for (j = 0; j < bufferSize / sizeof(double); j++)
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
|
||||
&gOutBuffer2[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(double));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
|
||||
maxErrorVal1);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -564,85 +564,11 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
uint32_t *p = (uint32_t *)gIn;
|
||||
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||
p[j] = genrand_int32(d);
|
||||
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
||||
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
|
||||
&gOutBuffer2[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(float));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
|
||||
maxErrorVal1);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -401,84 +401,10 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
|
||||
for (j = 0; j < bufferSize / sizeof(double); j++)
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
|
||||
&gOutBuffer2[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(double));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -400,83 +400,10 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
uint32_t *p = (uint32_t *)gIn;
|
||||
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||
p[j] = genrand_int32(d);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
||||
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
|
||||
&gOutBuffer2[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(float));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting)
|
||||
vlog("\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal);
|
||||
}
|
||||
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -299,76 +299,10 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
double *p = (double *)gIn;
|
||||
|
||||
for (j = 0; j < bufferSize / sizeof(double); j++) p[j] = random64(d);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_double);
|
||||
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(double));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting) vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -332,84 +332,10 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
|
||||
vlog("Wimp pass");
|
||||
else
|
||||
vlog("passed");
|
||||
|
||||
vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
}
|
||||
|
||||
if (gMeasureTimes)
|
||||
{
|
||||
// Init input array
|
||||
uint32_t *p = (uint32_t *)gIn;
|
||||
if (strstr(f->name, "exp") || strstr(f->name, "sin")
|
||||
|| strstr(f->name, "cos") || strstr(f->name, "tan"))
|
||||
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||
((float *)p)[j] = (float)genrand_real1(d);
|
||||
else if (strstr(f->name, "log"))
|
||||
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||
p[j] = genrand_int32(d) & 0x7fffffff;
|
||||
else
|
||||
for (j = 0; j < bufferSize / sizeof(float); j++)
|
||||
p[j] = genrand_int32(d);
|
||||
if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
|
||||
bufferSize, gIn, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
|
||||
{
|
||||
size_t vectorSize = sizeValues[j] * sizeof(cl_float);
|
||||
size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
|
||||
if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
|
||||
&gOutBuffer[j])))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
|
||||
&gInBuffer)))
|
||||
{
|
||||
LogBuildError(programs[j]);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for (k = 0; k < PERF_LOOP_COUNT; k++)
|
||||
{
|
||||
uint64_t startTime = GetTime();
|
||||
if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
|
||||
&localCount, NULL, 0, NULL,
|
||||
NULL)))
|
||||
{
|
||||
vlog_error("FAILED -- could not execute kernel\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if ((error = clFinish(gQueue)))
|
||||
{
|
||||
vlog_error("Error %d at clFinish\n", error);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime(endTime, startTime);
|
||||
sum += time;
|
||||
if (time < bestTime) bestTime = time;
|
||||
}
|
||||
|
||||
if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double)gDeviceFrequency
|
||||
* gComputeDevices * gSimdSize * 1e6
|
||||
/ (bufferSize / sizeof(float));
|
||||
vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s",
|
||||
f->name, sizeNames[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!gSkipCorrectnessTesting) vlog("\t%8.2f @ %a", maxError, maxErrorVal);
|
||||
vlog("\n");
|
||||
|
||||
exit:
|
||||
|
||||
@@ -59,8 +59,6 @@ extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT];
|
||||
extern uint32_t gComputeDevices;
|
||||
extern uint32_t gSimdSize;
|
||||
extern int gSkipCorrectnessTesting;
|
||||
extern int gMeasureTimes;
|
||||
extern int gReportAverageTimes;
|
||||
extern int gForceFTZ;
|
||||
extern int gFastRelaxedDerived;
|
||||
extern int gWimpyMode;
|
||||
@@ -91,8 +89,6 @@ float Abs_Error(float test, double reference);
|
||||
float Ulp_Error(float test, double reference);
|
||||
float Bruteforce_Ulp_Error_Double(double test, long double reference);
|
||||
|
||||
uint64_t GetTime(void);
|
||||
double SubtractTime(uint64_t endTime, uint64_t startTime);
|
||||
int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
|
||||
cl_program *p, bool relaxedMode);
|
||||
int MakeKernels(const char **c, cl_uint count, const char *name,
|
||||
@@ -123,8 +119,6 @@ static inline double DoubleFromUInt32(uint32_t bits)
|
||||
void _LogBuildError(cl_program p, int line, const char *file);
|
||||
#define LogBuildError(program) _LogBuildError(program, __LINE__, __FILE__)
|
||||
|
||||
#define PERF_LOOP_COUNT 100
|
||||
|
||||
// The spec is fairly clear that we may enforce a hard cutoff to prevent
|
||||
// premature flushing to zero.
|
||||
// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN +
|
||||
|
||||
Reference in New Issue
Block a user