diff --git a/test_conformance/math_brute_force/binary.cpp b/test_conformance/math_brute_force/binary.cpp index 1784c725..e6b9cbbc 100644 --- a/test_conformance/math_brute_force/binary.cpp +++ b/test_conformance/math_brute_force/binary.cpp @@ -25,9 +25,6 @@ int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata, int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata, bool relaxedMode); -const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126); -const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022); - extern const vtbl _binary = { "binary", TestFunc_Float_Float_Float, TestFunc_Double_Double_Double }; @@ -36,6 +33,8 @@ extern const vtbl _binary_nextafter = { TestFunc_Double_Double_Double_nextafter }; +const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126); +const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022); static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p, bool relaxedMode) @@ -76,7 +75,8 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, " size_t parity = i & 1; // Figure out how many elements are " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "buffer size \n" - " float3 f0, f1;\n" + " float3 f0;\n" + " float3 f1;\n" " switch( parity )\n" " {\n" " case 1:\n" @@ -163,7 +163,8 @@ static int BuildKernelDouble(const char *name, int vectorSize, " size_t parity = i & 1; // Figure out how many elements are " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "buffer size \n" - " double3 d0, d1;\n" + " double3 d0;\n" + " double3 d1;\n" " switch( parity )\n" " {\n" " case 1:\n" @@ -208,6 +209,35 @@ static int BuildKernelDouble(const char *name, int vectorSize, relaxedMode); } +typedef struct BuildKernelInfo +{ + cl_uint offset; // the first vector size to build + cl_uint kernel_count; + cl_kernel **kernels; + cl_program *programs; + const char *nameInCode; + bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. +} BuildKernelInfo; + +static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED, + void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernel(info->nameInCode, i, info->kernel_count, + info->kernels[i], info->programs + i, info->relaxedMode); +} + +static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED, + void *p) +{ + BuildKernelInfo *info = (BuildKernelInfo *)p; + cl_uint i = info->offset + job_id; + return BuildKernelDouble(info->nameInCode, i, info->kernel_count, + info->kernels[i], info->programs + i, + info->relaxedMode); +} + // A table of more difficult cases to get right static const float specialValuesFloat[] = { -NAN, @@ -311,38 +341,9 @@ static const float specialValuesFloat[] = { +0.0f }; -static size_t specialValuesFloatCount = +static const size_t specialValuesFloatCount = sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]); -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - cl_kernel **kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; - -static cl_int BuildKernel_FloatFn(cl_uint job_id, cl_uint thread_id UNUSED, - void *p) -{ - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); -} - -static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED, - void *p) -{ - BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernelDouble(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, - info->relaxedMode); -} - // Thread specific data for a worker thread typedef struct ThreadInfo { @@ -426,11 +427,11 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - + test_info.relaxedMode = relaxedMode; test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); test_info.skipNanInf = test_info.isFDim && !gInfNanSupport; test_info.isNextafter = isNextafter; - test_info.relaxedMode = relaxedMode; + // cl_kernels aren't thread safe, so we make one for each vector size for // every thread for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) @@ -543,12 +544,11 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, vlog("passed"); } - if (gMeasureTimes) { // Init input arrays - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; + cl_uint *p = (cl_uint *)gIn; + cl_uint *p2 = (cl_uint *)gIn2; for (j = 0; j < BUFFER_SIZE / sizeof(float); j++) { p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000; @@ -561,6 +561,7 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); return error; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL))) { @@ -568,7 +569,6 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, return error; } - // Run the kernels for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { @@ -633,7 +633,6 @@ static int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); vlog("\n"); - exit: for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { @@ -684,23 +683,21 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) int skipNanInf = job->skipNanInf; int isNextafter = job->isNextafter; cl_uint *t = 0; - float *r = 0, *s = 0, *s2 = 0; + cl_float *r = 0; + cl_float *s = 0; + cl_float *s2 = 0; cl_int copysign_test = 0; RoundingMode oldRoundMode; int skipVerification = 0; if (relaxedMode) { + func = job->f->rfunc; if (strcmp(name, "pow") == 0 && gFastRelaxedDerived) { - func = job->f->rfunc; ulps = INFINITY; skipVerification = 1; } - else - { - func = job->f->rfunc; - } } // start the map of the output arrays @@ -744,7 +741,8 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) { fp[j] = specialValuesFloat[x]; fp2[j] = specialValuesFloat[y]; - if (++x >= specialValuesFloatCount) + ++x; + if (x >= specialValuesFloatCount) { x = 0; y++; @@ -1203,13 +1201,11 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) fflush(stdout); } - exit: if (overflow) free(overflow); return error; } - // A table of more difficult cases to get right static const double specialValuesDouble[] = { -NAN, @@ -1444,10 +1440,10 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, vlog_error("clCreateCommandQueue failed. (%d)\n", error); goto exit; } + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); } - // Init the kernels { BuildKernelInfo build_info = { @@ -1460,6 +1456,7 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, goto exit; } + // Run the kernels if (!gSkipCorrectnessTesting) { error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info); @@ -1500,6 +1497,7 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); return error; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL))) { @@ -1507,7 +1505,6 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, return error; } - // Run the kernels for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { @@ -1573,7 +1570,6 @@ static int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); vlog("\n"); - exit: // Release for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) @@ -1622,7 +1618,9 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) int isNextafter = job->isNextafter; cl_ulong *t; - cl_double *r, *s, *s2; + cl_double *r; + cl_double *s; + cl_double *s2; Force64BitFPUPrecision(); @@ -1970,6 +1968,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) } fflush(stdout); } + exit: return error; } diff --git a/test_conformance/math_brute_force/binaryOperator.cpp b/test_conformance/math_brute_force/binaryOperator.cpp index bd1a3143..0957c6af 100644 --- a/test_conformance/math_brute_force/binaryOperator.cpp +++ b/test_conformance/math_brute_force/binaryOperator.cpp @@ -44,10 +44,11 @@ static int BuildKernel(const char *name, const char *operator_symbol, "* in2 )\n" "{\n" " size_t i = get_global_id(0);\n" - " out[i] = in1[i] ", + " out[i] = in1[i] ", operator_symbol, " in2[i];\n" "}\n" }; + const char *c3[] = { "__kernel void ", name, @@ -70,7 +71,8 @@ static int BuildKernel(const char *name, const char *operator_symbol, " size_t parity = i & 1; // Figure out how many elements are " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "buffer size \n" - " float3 f0, f1;\n" + " float3 f0;\n" + " float3 f1;\n" " switch( parity )\n" " {\n" " case 1:\n" @@ -137,8 +139,9 @@ static int BuildKernelDouble(const char *name, const char *operator_symbol, operator_symbol, " in2[i];\n" "}\n" }; + const char *c3[] = { - "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void ", name, "_kernel", @@ -160,7 +163,8 @@ static int BuildKernelDouble(const char *name, const char *operator_symbol, " size_t parity = i & 1; // Figure out how many elements are " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "buffer size \n" - " double3 d0, d1;\n" + " double3 d0;\n" + " double3 d1;\n" " switch( parity )\n" " {\n" " case 1:\n" @@ -235,43 +239,6 @@ static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED, info->programs + i, info->relaxedMode); } -// Thread specific data for a worker thread -typedef struct ThreadInfo -{ - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread - float maxError; // max error value. Init to 0. - double - maxErrorValue; // position of the max error value (param 1). Init to 0. - double maxErrorValue2; // position of the max error value (param 2). Init - // to 0. - MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; - -typedef struct TestInfo -{ - size_t subBufferSize; // Size of the sub-buffer in elements - const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread - cl_uint threadCount; // Number of worker threads - cl_uint jobCount; // Number of jobs - cl_uint step; // step between each chunk and the next. - cl_uint scale; // stride between individual test values - float ulps; // max_allowed ulps - int ftz; // non-zero if running in flush to zero mode - bool relaxedMode; // True if the test is being run in relaxed mode, false - // otherwise. - - // no special fields -} TestInfo; - // A table of more difficult cases to get right static const float specialValuesFloat[] = { -NAN, @@ -375,9 +342,46 @@ static const float specialValuesFloat[] = { +0.0f }; -static size_t specialValuesFloatCount = +static const size_t specialValuesFloatCount = sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]); +// Thread specific data for a worker thread +typedef struct ThreadInfo +{ + cl_mem inBuf; // input buffer for the thread + cl_mem inBuf2; // input buffer for the thread + cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + float maxError; // max error value. Init to 0. + double + maxErrorValue; // position of the max error value (param 1). Init to 0. + double maxErrorValue2; // position of the max error value (param 2). Init + // to 0. + MTdata d; + cl_command_queue tQueue; // per thread command queue to improve performance +} ThreadInfo; + +typedef struct TestInfo +{ + size_t subBufferSize; // Size of the sub-buffer in elements + const Func *f; // A pointer to the function info + cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes + cl_kernel + *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each + // worker thread: k[vector_size][thread_id] + ThreadInfo * + tinfo; // An array of thread specific information for each worker thread + cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs + cl_uint step; // step between each chunk and the next. + cl_uint scale; // stride between individual test values + float ulps; // max_allowed ulps + int ftz; // non-zero if running in flush to zero mode + bool relaxedMode; // True if the test is being run in relaxed mode, false + // otherwise. + + // no special fields +} TestInfo; + static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *p); int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, @@ -398,6 +402,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); test_info.scale = getTestScale(sizeof(cl_float)); + if (gWimpyMode) { test_info.subBufferSize = gWimpyBufferSize @@ -405,7 +410,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, * RoundUpToNextPowerOfTwo(test_info.threadCount)); } - test_info.step = test_info.subBufferSize * test_info.scale; + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; if (test_info.step / test_info.subBufferSize != test_info.scale) { // there was overflow @@ -481,8 +486,8 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, ®ion, &error); if (error || NULL == test_info.tinfo[i].outBuf[j]) { - vlog_error("Error: Unable to create sub-buffer of gInBuffer " - "for region {%zd, %zd}\n", + vlog_error("Error: Unable to create sub-buffer of " + "gInBuffer for region {%zd, %zd}\n", region.origin, region.size); goto exit; } @@ -513,6 +518,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, goto exit; } + // Run the kernels if (!gSkipCorrectnessTesting) { error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info); @@ -536,7 +542,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, vlog("passed"); } - if (gMeasureTimes) { // Init input arrays @@ -554,6 +559,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); return error; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL))) { @@ -626,7 +632,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d, vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); vlog("\n"); - exit: for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { @@ -665,30 +670,31 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) cl_uint base = job_id * (cl_uint)job->step; ThreadInfo *tinfo = job->tinfo + thread_id; fptr func = job->f->func; + int ftz = job->ftz; bool relaxedMode = job->relaxedMode; float ulps = getAllowedUlpError(job->f, relaxedMode); - if (relaxedMode) - { - func = job->f->rfunc; - } - - - int ftz = job->ftz; MTdata d = tinfo->d; cl_uint j, k; cl_int error; cl_uchar *overflow = (cl_uchar *)malloc(buffer_size); const char *name = job->f->name; - cl_uint *t; - cl_float *r, *s, *s2; + cl_uint *t = 0; + cl_float *r = 0; + cl_float *s = 0; + cl_float *s2 = 0; RoundingMode oldRoundMode; + if (relaxedMode) + { + func = job->f->rfunc; + } + // start the map of the output arrays cl_event e[VECTOR_SIZE_COUNT]; cl_uint *out[VECTOR_SIZE_COUNT]; for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - out[j] = (uint32_t *)clEnqueueMapBuffer( + out[j] = (cl_uint *)clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); if (error || NULL == out[j]) @@ -711,7 +717,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) specialValuesFloatCount * specialValuesFloatCount; int indx = (totalSpecialValueCount - 1) / buffer_elements; - if (job_id <= (cl_uint)indx) { // Insert special values @@ -877,7 +882,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) // an in order queue. for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++) { - out[j] = (uint32_t *)clEnqueueMapBuffer( + out[j] = (cl_uint *)clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); if (error || NULL == out[j]) @@ -889,9 +894,9 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) } // Wait for the last buffer - out[j] = (uint32_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j], - CL_TRUE, CL_MAP_READ, 0, - buffer_size, 0, NULL, NULL, &error); + out[j] = (cl_uint *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j], + CL_TRUE, CL_MAP_READ, 0, buffer_size, + 0, NULL, NULL, &error); if (error || NULL == out[j]) { vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error); @@ -1136,6 +1141,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) } fflush(stdout); } + exit: if (overflow) free(overflow); return error; @@ -1267,6 +1273,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, float maxError = 0.0f; double maxErrorVal = 0.0; double maxErrorVal2 = 0.0; + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); // Init test_info @@ -1275,6 +1282,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); test_info.scale = getTestScale(sizeof(cl_double)); + if (gWimpyMode) { test_info.subBufferSize = gWimpyBufferSize @@ -1373,7 +1381,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, test_info.tinfo[i].d = init_genrand(genrand_int32(d)); } - // Init the kernels { BuildKernelInfo build_info = { gMinVectorSizeIndex, @@ -1389,6 +1396,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, goto exit; } + // Run the kernels if (!gSkipCorrectnessTesting) { error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info); @@ -1412,7 +1420,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, vlog("passed"); } - if (gMeasureTimes) { // Init input arrays @@ -1503,7 +1510,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d, vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); vlog("\n"); - exit: // Release for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) @@ -1551,7 +1557,9 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) cl_int error; const char *name = job->f->name; cl_ulong *t; - cl_double *r, *s, *s2; + cl_double *r; + cl_double *s; + cl_double *s2; Force64BitFPUPrecision(); diff --git a/test_conformance/math_brute_force/binary_i.cpp b/test_conformance/math_brute_force/binary_i.cpp index a8535281..f931c5be 100644 --- a/test_conformance/math_brute_force/binary_i.cpp +++ b/test_conformance/math_brute_force/binary_i.cpp @@ -15,8 +15,8 @@ // #include "Utility.h" -#include #include +#include #include "FunctionList.h" int TestFunc_Float_Float_Int(const Func *f, MTdata, bool relaxedMode); @@ -228,7 +228,6 @@ static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED, info->relaxedMode); } - // A table of more difficult cases to get right static const float specialValuesFloat[] = { -NAN, @@ -331,9 +330,9 @@ static const float specialValuesFloat[] = { MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f }; -static size_t specialValuesFloatCount = - sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]); +static const size_t specialValuesFloatCount = + sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]); static const int specialValuesInt[] = { 0, 1, 2, 3, 126, 127, @@ -484,8 +483,8 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) ®ion, &error); if (error || NULL == test_info.tinfo[i].outBuf[j]) { - vlog_error("Error: Unable to create sub-buffer of gInBuffer " - "for region {%zd, %zd}\n", + vlog_error("Error: Unable to create sub-buffer of " + "gInBuffer for region {%zd, %zd}\n", region.origin, region.size); goto exit; } @@ -497,6 +496,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) vlog_error("clCreateCommandQueue failed. (%d)\n", error); goto exit; } + test_info.tinfo[i].d = init_genrand(genrand_int32(d)); } @@ -537,7 +537,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) vlog("passed"); } - if (gMeasureTimes) { // Init input arrays @@ -555,6 +554,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); return error; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL))) { @@ -627,7 +627,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode) vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); vlog("\n"); - exit: for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { @@ -658,7 +657,6 @@ exit: return error; } - static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) { const TestInfo *job = (const TestInfo *)data; @@ -666,23 +664,24 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) size_t buffer_size = buffer_elements * sizeof(cl_float); cl_uint base = job_id * (cl_uint)job->step; ThreadInfo *tinfo = job->tinfo + thread_id; - float ulps = job->ulps; fptr func = job->f->func; int ftz = job->ftz; + float ulps = job->ulps; MTdata d = tinfo->d; cl_uint j, k; cl_int error; const char *name = job->f->name; - cl_uint *t; - cl_float *r, *s; - cl_int *s2; + cl_uint *t = 0; + cl_float *r = 0; + cl_float *s = 0; + cl_int *s2 = 0; // start the map of the output arrays cl_event e[VECTOR_SIZE_COUNT]; cl_uint *out[VECTOR_SIZE_COUNT]; for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - out[j] = (uint32_t *)clEnqueueMapBuffer( + out[j] = (cl_uint *)clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); if (error || NULL == out[j]) @@ -700,9 +699,11 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; j = 0; + int totalSpecialValueCount = specialValuesFloatCount * specialValuesIntCount; int indx = (totalSpecialValueCount - 1) / buffer_elements; + if (job_id <= (cl_uint)indx) { // test edge cases float *fp = (float *)p; @@ -716,7 +717,8 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) { fp[j] = specialValuesFloat[x]; ip2[j] = specialValuesInt[y]; - if (++x >= specialValuesFloatCount) + ++x; + if (x >= specialValuesFloatCount) { x = 0; y++; @@ -820,7 +822,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) // an in order queue. for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++) { - out[j] = (uint32_t *)clEnqueueMapBuffer( + out[j] = (cl_uint *)clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); if (error || NULL == out[j]) @@ -832,9 +834,9 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) } // Wait for the last buffer - out[j] = (uint32_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j], - CL_TRUE, CL_MAP_READ, 0, - buffer_size, 0, NULL, NULL, &error); + out[j] = (cl_uint *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j], + CL_TRUE, CL_MAP_READ, 0, buffer_size, + 0, NULL, NULL, &error); if (error || NULL == out[j]) { vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error); @@ -1057,6 +1059,7 @@ static const double specialValuesDouble[] = { MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, }; + static size_t specialValuesDoubleCount = sizeof(specialValuesDouble) / sizeof(specialValuesDouble[0]); @@ -1165,12 +1168,9 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - /* Qualcomm fix: 9461 read-write flags must be compatible with - * parent buffer */ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - /* Qualcomm fix: end */ if (error || NULL == test_info.tinfo[i].outBuf[j]) { vlog_error("Error: Unable to create sub-buffer of gInBuffer " @@ -1190,7 +1190,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) test_info.tinfo[i].d = init_genrand(genrand_int32(d)); } - // Init the kernels { BuildKernelInfo build_info = { @@ -1320,7 +1319,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode) vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2); vlog("\n"); - exit: // Release for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) @@ -1367,7 +1365,8 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) cl_int error; const char *name = job->f->name; cl_ulong *t; - cl_double *r, *s; + cl_double *r; + cl_double *s; cl_int *s2; Force64BitFPUPrecision(); @@ -1398,6 +1397,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) int totalSpecialValueCount = specialValuesDoubleCount * specialValuesInt2Count; int indx = (totalSpecialValueCount - 1) / buffer_elements; + if (job_id <= (cl_uint)indx) { // test edge cases cl_double *fp = (cl_double *)p; diff --git a/test_conformance/math_brute_force/binary_two_results_i.cpp b/test_conformance/math_brute_force/binary_two_results_i.cpp index a0aa9d25..2ecf1c28 100644 --- a/test_conformance/math_brute_force/binary_two_results_i.cpp +++ b/test_conformance/math_brute_force/binary_two_results_i.cpp @@ -40,7 +40,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], - "* in2)\n" + "* in2 )\n" "{\n" " int i = get_global_id(0);\n" " out[i] = ", @@ -71,7 +71,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, " size_t parity = i & 1; // Figure out how many elements are " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "buffer size \n" - " float3 f0, f1;\n" + " float3 f0;\n" + " float3 f1;\n" + " int3 i0 = 0xdeaddead;\n" " switch( parity )\n" " {\n" " case 1:\n" @@ -83,7 +85,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n" " break;\n" " }\n" - " int3 i0 = 0xdeaddead;\n" " f0 = ", name, "( f0, f1, &i0 );\n" @@ -132,12 +133,12 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], - "* in2)\n" + "* in2 )\n" "{\n" " int i = get_global_id(0);\n" " out[i] = ", name, - "( in1[i], in2[i], out2 + i );\n" + "( in1[i], in2[i], out2[i] );\n" "}\n" }; const char *c3[] = { @@ -164,7 +165,9 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, " size_t parity = i & 1; // Figure out how many elements are " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "buffer size \n" - " double3 d0, d1;\n" + " double3 d0;\n" + " double3 d1;\n" + " int3 i0 = 0xdeaddead;\n" " switch( parity )\n" " {\n" " case 1:\n" @@ -176,7 +179,6 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n" " break;\n" " }\n" - " int3 i0 = 0xdeaddead;\n" " d0 = ", name, "( d0, d1, &i0 );\n" @@ -309,20 +311,22 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) uint64_t i; uint32_t j, k; int error; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + cl_program programs[VECTOR_SIZE_COUNT]; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError = 0.0f; - float float_ulps; - int64_t maxError2 = 0; int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + int64_t maxError2 = 0; float maxErrorVal = 0.0f; float maxErrorVal2 = 0.0f; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; uint64_t step = getTestStep(sizeof(float), bufferSize); cl_uint threadCount = GetThreadCount(); - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + float float_ulps; if (gIsEmbedded) float_ulps = f->float_embedded_ulps; else @@ -485,7 +489,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) { for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++) { - uint32_t *q = (uint32_t *)gOut[k]; + uint32_t *q = (uint32_t *)(gOut[k]); int32_t *q2 = (int32_t *)gOut2[k]; // Check for exact match to correctly rounded result @@ -695,9 +699,11 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode) if (gMeasureTimes) { // Init input array - uint32_t *p = (uint32_t *)gIn; + cl_uint *p = (cl_uint *)gIn; for (j = 0; j < bufferSize / sizeof(float); j++) + { p[j] = genrand_int32(d); + } if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL))) { @@ -823,9 +829,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) if ((error = ThreadPool_Do(BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) - { return error; - } } for (i = 0; i < (1ULL << 32); i += step) @@ -1185,7 +1189,6 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) { vlog("."); } - fflush(stdout); } } @@ -1202,7 +1205,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode) { // Init input array double *p = (double *)gIn; - for (j = 0; j < bufferSize / sizeof(double); j++) + for (j = 0; j < bufferSize / sizeof(cl_double); j++) p[j] = DoubleFromUInt32(genrand_int32(d)); if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, bufferSize, gIn, 0, NULL, NULL))) diff --git a/test_conformance/math_brute_force/i_unary.cpp b/test_conformance/math_brute_force/i_unary.cpp index 7f2f79a3..b736b24f 100644 --- a/test_conformance/math_brute_force/i_unary.cpp +++ b/test_conformance/math_brute_force/i_unary.cpp @@ -42,6 +42,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, name, "( in[i] );\n" "}\n" }; + const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -87,7 +88,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, "}\n" }; - const char **kern = c; size_t kernSize = sizeof(c) / sizeof(c[0]); @@ -114,7 +114,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], - "* in)\n" + "* in )\n" "{\n" " int i = get_global_id(0);\n" " out[i] = ", @@ -177,7 +177,6 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, kernSize = sizeof(c3) / sizeof(c3[0]); } - char testName[32]; snprintf(testName, sizeof(testName) - 1, "math_kernel%s", sizeNames[vectorSize]); @@ -219,7 +218,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) int error; cl_program programs[VECTOR_SIZE_COUNT]; cl_kernel kernels[VECTOR_SIZE_COUNT]; - int ftz = f->ftz || 0 == (gFloatCapabilities & CL_FP_DENORM) || gForceFTZ; + int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; uint64_t step = getTestStep(sizeof(float), bufferSize); int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(float)) + 1); @@ -234,27 +233,30 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) Force64BitFPUPrecision(); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if ((error = ThreadPool_Do(BuildKernel_FloatFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - return error; + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernel_FloatFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } for (i = 0; i < (1ULL << 32); i += step) { // Init input array - uint32_t *p = (uint32_t *)gIn; + cl_uint *p = (cl_uint *)gIn; if (gWimpyMode) { for (j = 0; j < bufferSize / sizeof(float); j++) - p[j] = (uint32_t)i + j * scale; + p[j] = (cl_uint)i + j * scale; } else { for (j = 0; j < bufferSize / sizeof(float); j++) p[j] = (uint32_t)i + j; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL))) { @@ -281,7 +283,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; + size_t localCount = (bufferSize + vectorSize - 1) + / vectorSize; // bufferSize / vectorSize rounded up if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), &gOutBuffer[j]))) { @@ -396,8 +399,9 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) // Run the kernels for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; + size_t vectorSize = sizeof(cl_float) * sizeValues[j]; + size_t localCount = (bufferSize + vectorSize - 1) + / vectorSize; // bufferSize / vectorSize rounded up if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), &gOutBuffer[j]))) { @@ -447,6 +451,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) } vlog("\n"); + exit: RestoreFPState(&oldMode); // Release @@ -481,13 +486,13 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) Force64BitFPUPrecision(); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if ((error = ThreadPool_Do(BuildKernel_DoubleFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) { - return error; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernel_DoubleFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; } for (i = 0; i < (1ULL << 32); i += step) @@ -504,6 +509,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) for (j = 0; j < bufferSize / sizeof(cl_double); j++) p[j] = DoubleFromUInt32((uint32_t)i + j); } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL))) { @@ -529,8 +535,9 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) // Run the kernels for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (bufferSize + vectorSize - 1) / vectorSize; + size_t vectorSize = sizeof(cl_double) * sizeValues[j]; + size_t localCount = (bufferSize + vectorSize - 1) + / vectorSize; // bufferSize / vectorSize rounded up if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]), &gOutBuffer[j]))) { @@ -616,6 +623,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) { vlog("."); } + fflush(stdout); } } @@ -698,7 +706,6 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); - exit: RestoreFPState(&oldMode); // Release diff --git a/test_conformance/math_brute_force/macro_binary.cpp b/test_conformance/math_brute_force/macro_binary.cpp index b0b82149..0c37068e 100644 --- a/test_conformance/math_brute_force/macro_binary.cpp +++ b/test_conformance/math_brute_force/macro_binary.cpp @@ -64,7 +64,8 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, " size_t parity = i & 1; // Figure out how many elements are " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "buffer size \n" - " float3 f0, f1;\n" + " float3 f0;\n" + " float3 f1;\n" " switch( parity )\n" " {\n" " case 1:\n" @@ -92,7 +93,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, "}\n" }; - const char **kern = c; size_t kernSize = sizeof(c) / sizeof(c[0]); @@ -110,7 +110,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } - static int BuildKernelDouble(const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p, bool relaxedMode) @@ -153,7 +152,8 @@ static int BuildKernelDouble(const char *name, int vectorSize, " size_t parity = i & 1; // Figure out how many elements are " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "buffer size \n" - " double3 f0, f1;\n" + " double3 f0;\n" + " double3 f1;\n" " switch( parity )\n" " {\n" " case 1:\n" @@ -190,7 +190,6 @@ static int BuildKernelDouble(const char *name, int vectorSize, kernSize = sizeof(c3) / sizeof(c3[0]); } - char testName[32]; snprintf(testName, sizeof(testName) - 1, "math_kernel%s", sizeNames[vectorSize]); @@ -228,7 +227,6 @@ static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED, info->relaxedMode); } - // A table of more difficult cases to get right static const float specialValuesFloat[] = { -NAN, @@ -379,6 +377,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); test_info.scale = getTestScale(sizeof(cl_float)); + if (gWimpyMode) { test_info.subBufferSize = gWimpyBufferSize @@ -460,8 +459,8 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) ®ion, &error); if (error || NULL == test_info.tinfo[i].outBuf[j]) { - vlog_error("Error: Unable to create sub-buffer of gInBuffer " - "for region {%zd, %zd}\n", + vlog_error("Error: Unable to create sub-buffer of " + "gInBuffer for region {%zd, %zd}\n", region.origin, region.size); goto exit; } @@ -489,7 +488,6 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) goto exit; } - // Run the kernels if (!gSkipCorrectnessTesting) { @@ -506,8 +504,8 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) if (gMeasureTimes) { // Init input arrays - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; + cl_uint *p = (cl_uint *)gIn; + cl_uint *p2 = (cl_uint *)gIn2; for (j = 0; j < BUFFER_SIZE / sizeof(float); j++) { p[j] = genrand_int32(d); @@ -520,6 +518,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); return error; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL))) { @@ -531,8 +530,9 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) // Run the kernels for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; + size_t vectorSize = sizeof(cl_float) * sizeValues[j]; + size_t localCount = (BUFFER_SIZE + vectorSize - 1) + / vectorSize; // BUFFER_SIZE / vectorSize rounded up if ((error = clSetKernelArg(test_info.k[j][0], 0, sizeof(gOutBuffer[j]), &gOutBuffer[j]))) { @@ -586,6 +586,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode) f->name, sizeNames[j]); } } + vlog("\n"); exit: @@ -631,8 +632,10 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) cl_uint j, k; cl_int error; const char *name = job->f->name; - cl_int *t, *r; - cl_float *s, *s2; + cl_int *t = 0; + cl_int *r = 0; + cl_float *s = 0; + cl_float *s2 = 0; // start the map of the output arrays cl_event e[VECTOR_SIZE_COUNT]; @@ -657,6 +660,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements; cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements; j = 0; + int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount; int indx = (totalSpecialValueCount - 1) / buffer_elements; @@ -674,7 +678,8 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) { fp[j] = specialValuesFloat[x]; fp2[j] = specialValuesFloat[y]; - if (++x >= specialValuesFloatCount) + ++x; + if (x >= specialValuesFloatCount) { x = 0; y++; @@ -690,7 +695,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) p2[j] = genrand_int32(d); } - if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL))) { @@ -895,6 +899,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) } } } + for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], @@ -1044,7 +1049,6 @@ static const double specialValuesDouble[] = { static size_t specialValuesDoubleCount = sizeof(specialValuesDouble) / sizeof(specialValuesDouble[0]); - static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *p); int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) @@ -1061,6 +1065,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); test_info.scale = getTestScale(sizeof(cl_double)); + if (gWimpyMode) { test_info.subBufferSize = gWimpyBufferSize @@ -1136,12 +1141,9 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - /* Qualcomm fix: 9461 read-write flags must be compatible with - * parent buffer */ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - /* Qualcomm fix: end */ if (error || NULL == test_info.tinfo[i].outBuf[j]) { vlog_error("Error: Unable to create sub-buffer of gInBuffer " @@ -1161,7 +1163,6 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) test_info.tinfo[i].d = init_genrand(genrand_int32(d)); } - // Init the kernels { BuildKernelInfo build_info = { @@ -1174,6 +1175,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) goto exit; } + // Run the kernels if (!gSkipCorrectnessTesting) { error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info); @@ -1189,8 +1191,8 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) if (gMeasureTimes) { // Init input arrays - uint64_t *p = (uint64_t *)gIn; - uint64_t *p2 = (uint64_t *)gIn2; + cl_ulong *p = (cl_ulong *)gIn; + cl_ulong *p2 = (cl_ulong *)gIn2; for (j = 0; j < BUFFER_SIZE / sizeof(double); j++) { p[j] = @@ -1216,8 +1218,9 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) // Run the kernels for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; + size_t vectorSize = sizeof(cl_double) * sizeValues[j]; + size_t localCount = (BUFFER_SIZE + vectorSize - 1) + / vectorSize; // BUFFER_SIZE / vectorSize rounded up if ((error = clSetKernelArg(test_info.k[j][0], 0, sizeof(gOutBuffer[j]), &gOutBuffer[j]))) { @@ -1319,8 +1322,10 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) cl_uint j, k; cl_int error; const char *name = job->f->name; - cl_long *t, *r; - cl_double *s, *s2; + cl_long *t; + cl_long *r; + cl_double *s; + cl_double *s2; Force64BitFPUPrecision(); @@ -1378,7 +1383,6 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) ((cl_ulong *)p2)[j] = genrand_int64(d); } - if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL))) { @@ -1493,11 +1497,12 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) t = (cl_long *)r; for (j = 0; j < buffer_elements; j++) { - cl_long *q = (cl_long *)out[0]; + cl_long *q = out[0]; // If we aren't getting the correctly rounded result if (gMinVectorSizeIndex == 0 && t[j] != q[j]) { + // If we aren't getting the correctly rounded result if (ftz) { if (IsDoubleSubnormal(s[j])) @@ -1528,7 +1533,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) } } - uint64_t err = t[j] - q[j]; + cl_ulong err = t[j] - q[j]; if (q[j] > t[j]) err = q[j] - t[j]; vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld " "vs. %lld (index: %d)\n", @@ -1575,7 +1580,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) } } - uint64_t err = -t[j] - q[j]; + cl_ulong err = -t[j] - q[j]; if (q[j] > -t[j]) err = q[j] + t[j]; vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, " "%.13la}: *%lld vs. %lld (index: %d)\n", diff --git a/test_conformance/math_brute_force/macro_unary.cpp b/test_conformance/math_brute_force/macro_unary.cpp index bf08a170..ced72be8 100644 --- a/test_conformance/math_brute_force/macro_unary.cpp +++ b/test_conformance/math_brute_force/macro_unary.cpp @@ -34,13 +34,14 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], - "* in)\n" + "* in )\n" "{\n" " int i = get_global_id(0);\n" " out[i] = ", name, "( in[i] );\n" "}\n" }; + const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -115,7 +116,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], - "* in)\n" + "* in )\n" "{\n" " int i = get_global_id(0);\n" " out[i] = ", @@ -178,7 +179,6 @@ static int BuildKernelDouble(const char *name, int vectorSize, kernSize = sizeof(c3) / sizeof(c3[0]); } - char testName[32]; snprintf(testName, sizeof(testName) - 1, "math_kernel%s", sizeNames[vectorSize]); @@ -258,6 +258,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); test_info.scale = getTestScale(sizeof(cl_float)); + if (gWimpyMode) { test_info.subBufferSize = gWimpyBufferSize @@ -279,6 +280,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) test_info.f = f; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + // cl_kernels aren't thread safe, so we make one for each vector size for // every thread for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) @@ -328,8 +330,8 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) ®ion, &error); if (error || NULL == test_info.tinfo[i].outBuf[j]) { - vlog_error("Error: Unable to create sub-buffer of gOutBuffer " - "for region {%zd, %zd}\n", + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer for region {%zd, %zd}\n", region.origin, region.size); goto exit; } @@ -355,6 +357,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode) goto exit; } + // Run the kernels if (!gSkipCorrectnessTesting) { error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info); @@ -501,7 +504,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) } } - // Get that moving if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n"); @@ -569,7 +571,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) } } - // Get that moving if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); @@ -594,6 +595,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) return error; } } + // Wait for the last buffer out[j] = (cl_int *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, @@ -711,12 +713,14 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) size_t i, j; logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + // Init test_info memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); test_info.scale = getTestScale(sizeof(cl_double)); + if (gWimpyMode) { test_info.subBufferSize = gWimpyBufferSize @@ -782,12 +786,9 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - /* Qualcomm fix: 9461 read-write flags must be compatible with - * parent buffer */ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - /* Qualcomm fix: end */ if (error || NULL == test_info.tinfo[i].outBuf[j]) { vlog_error("Error: Unable to create sub-buffer of gInBuffer " @@ -817,6 +818,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) goto exit; } + // Run the kernels if (!gSkipCorrectnessTesting) { error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info); @@ -846,8 +848,9 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) // Run the kernels for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; + size_t vectorSize = sizeof(cl_double) * sizeValues[j]; + size_t localCount = (BUFFER_SIZE + vectorSize - 1) + / vectorSize; // BUFFER_SIZE / vectorSize rounded up if ((error = clSetKernelArg(test_info.k[j][0], 0, sizeof(gOutBuffer[j]), &gOutBuffer[j]))) { @@ -900,6 +903,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: + // Release for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); @@ -936,9 +940,9 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) cl_uint base = job_id * (cl_uint)job->step; ThreadInfo *tinfo = job->tinfo + thread_id; dptr dfunc = job->f->dfunc; + int ftz = job->ftz; cl_uint j, k; cl_int error; - int ftz = job->ftz; const char *name = job->f->name; Force64BitFPUPrecision(); @@ -1027,7 +1031,6 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) } } - // Get that moving if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); @@ -1052,6 +1055,7 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) return error; } } + // Wait for the last buffer out[j] = (cl_long *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, @@ -1062,14 +1066,12 @@ static cl_int TestDouble(cl_uint job_id, cl_uint thread_id, void *data) return error; } - // Verify data cl_long *t = (cl_long *)r; for (j = 0; j < buffer_elements; j++) { cl_long *q = out[0]; - // If we aren't getting the correctly rounded result if (gMinVectorSizeIndex == 0 && t[j] != q[j]) { diff --git a/test_conformance/math_brute_force/mad.cpp b/test_conformance/math_brute_force/mad.cpp index fb144e4b..872caa0b 100644 --- a/test_conformance/math_brute_force/mad.cpp +++ b/test_conformance/math_brute_force/mad.cpp @@ -44,6 +44,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, name, "( in1[i], in2[i], in3[i] );\n" "}\n" }; + const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -66,7 +67,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, " size_t parity = i & 1; // Figure out how many elements are " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "buffer size \n" - " float3 f0, f1, f2;\n" + " float3 f0;\n" + " float3 f1;\n" + " float3 f2;\n" " switch( parity )\n" " {\n" " case 1:\n" @@ -133,6 +136,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, name, "( in1[i], in2[i], in3[i] );\n" "}\n" }; + const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -156,7 +160,9 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, " size_t parity = i & 1; // Figure out how many elements are " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "buffer size \n" - " double3 d0, d1, d2;\n" + " double3 d0;\n" + " double3 d1;\n" + " double3 d2;\n" " switch( parity )\n" " {\n" " case 1:\n" @@ -247,37 +253,42 @@ int TestFunc_mad(const Func *f, MTdata d, bool relaxedMode) uint64_t step = getTestStep(sizeof(float), bufferSize); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if ((error = ThreadPool_Do(BuildKernel_FloatFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - return error; + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernel_FloatFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } for (i = 0; i < (1ULL << 32); i += step) { // Init input array - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; - uint32_t *p3 = (uint32_t *)gIn3; + cl_uint *p = (cl_uint *)gIn; + cl_uint *p2 = (cl_uint *)gIn2; + cl_uint *p3 = (cl_uint *)gIn3; for (j = 0; j < bufferSize / sizeof(float); j++) { p[j] = genrand_int32(d); p2[j] = genrand_int32(d); p3[j] = genrand_int32(d); } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL))) { vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); return error; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL))) { vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); return error; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL))) { @@ -379,15 +390,15 @@ int TestFunc_mad(const Func *f, MTdata d, bool relaxedMode) if (gWimpyMode) vlog("Wimp pass"); else - vlog("pass"); + vlog("passed"); } if (gMeasureTimes) { // Init input array - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; - uint32_t *p3 = (uint32_t *)gIn3; + cl_uint *p = (cl_uint *)gIn; + cl_uint *p2 = (cl_uint *)gIn2; + cl_uint *p3 = (cl_uint *)gIn3; for (j = 0; j < bufferSize / sizeof(float); j++) { p[j] = genrand_int32(d); @@ -508,18 +519,18 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode) double maxErrorVal2 = 0.0f; double maxErrorVal3 = 0.0f; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); uint64_t step = getTestStep(sizeof(double), bufferSize); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if ((error = ThreadPool_Do(BuildKernel_DoubleFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) { - return error; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernel_DoubleFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; } for (i = 0; i < (1ULL << 32); i += step) @@ -534,18 +545,21 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode) p2[j] = DoubleFromUInt32(genrand_int32(d)); p3[j] = DoubleFromUInt32(genrand_int32(d)); } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL))) { vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); return error; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL))) { vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); return error; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL))) { @@ -647,7 +661,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode) if (gWimpyMode) vlog("Wimp pass"); else - vlog("pass"); + vlog("passed"); } if (gMeasureTimes) diff --git a/test_conformance/math_brute_force/ternary.cpp b/test_conformance/math_brute_force/ternary.cpp index b3eea0f4..1b03b209 100644 --- a/test_conformance/math_brute_force/ternary.cpp +++ b/test_conformance/math_brute_force/ternary.cpp @@ -52,7 +52,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], - "( __global float* out, __global float* in, __global float* in2 , " + "( __global float* out, __global float* in, __global float* in2, " "__global float* in3)\n" "{\n" " size_t i = get_global_id(0);\n" @@ -71,7 +71,9 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, " size_t parity = i & 1; // Figure out how many elements are " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "buffer size \n" - " float3 f0, f1, f2;\n" + " float3 f0;\n" + " float3 f1;\n" + " float3 f2;\n" " switch( parity )\n" " {\n" " case 1:\n" @@ -143,7 +145,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", sizeNames[vectorSize], - "( __global double* out, __global double* in, __global double* in2 , " + "( __global double* out, __global double* in, __global double* in2, " "__global double* in3)\n" "{\n" " size_t i = get_global_id(0);\n" @@ -162,7 +164,9 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, " size_t parity = i & 1; // Figure out how many elements are " "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " "buffer size \n" - " double3 d0, d1, d2;\n" + " double3 d0;\n" + " double3 d1;\n" + " double3 d2;\n" " switch( parity )\n" " {\n" " case 1:\n" @@ -235,7 +239,6 @@ static cl_int BuildKernel_DoubleFn(cl_uint job_id, cl_uint thread_id UNUSED, info->programs + i, info->relaxedMode); } - // A table of more difficult cases to get right static const float specialValuesFloat[] = { -NAN, @@ -315,7 +318,7 @@ static const float specialValuesFloat[] = { +0.0f }; -static size_t specialValuesFloatCount = +static const size_t specialValuesFloatCount = sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]); @@ -324,6 +327,9 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) uint64_t i; uint32_t j, k; int error; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + cl_program programs[VECTOR_SIZE_COUNT]; cl_kernel kernels[VECTOR_SIZE_COUNT]; float maxError = 0.0f; @@ -332,33 +338,34 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) float maxErrorVal2 = 0.0f; float maxErrorVal3 = 0.0f; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; - uint64_t step = getTestStep(sizeof(float), bufferSize); - int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport; + cl_uchar overflow[BUFFER_SIZE / sizeof(float)]; + float float_ulps; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - if (gIsEmbedded) float_ulps = f->float_embedded_ulps; else float_ulps = f->float_ulps; + int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport; + // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if ((error = ThreadPool_Do(BuildKernel_FloatFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - return error; + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernel_FloatFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } for (i = 0; i < (1ULL << 32); i += step) { // Init input array - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; - uint32_t *p3 = (uint32_t *)gIn3; + cl_uint *p = (cl_uint *)gIn; + cl_uint *p2 = (cl_uint *)gIn2; + cl_uint *p3 = (cl_uint *)gIn3; j = 0; if (i == 0) { // test edge cases @@ -393,18 +400,21 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) p2[j] = genrand_int32(d); p3[j] = genrand_int32(d); } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL))) { vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); return error; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL))) { vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); return error; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL))) { @@ -493,7 +503,6 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED); } - // Read the data back for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { @@ -963,9 +972,9 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) if (gMeasureTimes) { // Init input array - uint32_t *p = (uint32_t *)gIn; - uint32_t *p2 = (uint32_t *)gIn2; - uint32_t *p3 = (uint32_t *)gIn3; + cl_uint *p = (cl_uint *)gIn; + cl_uint *p2 = (cl_uint *)gIn2; + cl_uint *p3 = (cl_uint *)gIn3; for (j = 0; j < bufferSize / sizeof(float); j++) { p[j] = genrand_int32(d); @@ -1160,21 +1169,21 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d, double maxErrorVal = 0.0f; double maxErrorVal2 = 0.0f; double maxErrorVal3 = 0.0f; - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; uint64_t step = getTestStep(sizeof(double), bufferSize); + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + Force64BitFPUPrecision(); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if ((error = ThreadPool_Do(BuildKernel_DoubleFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) { - return error; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernel_DoubleFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; } for (i = 0; i < (1ULL << 32); i += step) @@ -1213,18 +1222,21 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d, p2[j] = DoubleFromUInt32(genrand_int32(d)); p3[j] = DoubleFromUInt32(genrand_int32(d)); } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL))) { vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); return error; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL))) { vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); return error; } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL))) { @@ -1287,7 +1299,6 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d, } } - // Get that moving if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); diff --git a/test_conformance/math_brute_force/unary.cpp b/test_conformance/math_brute_force/unary.cpp index ae3f54e8..5085b9b4 100644 --- a/test_conformance/math_brute_force/unary.cpp +++ b/test_conformance/math_brute_force/unary.cpp @@ -37,13 +37,14 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], - "* in)\n" + "* in )\n" "{\n" " int i = get_global_id(0);\n" " out[i] = ", name, "( in[i] );\n" "}\n" }; + const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -89,7 +90,6 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, "}\n" }; - const char **kern = c; size_t kernSize = sizeof(c) / sizeof(c[0]); @@ -118,7 +118,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], - "* in)\n" + "* in )\n" "{\n" " int i = get_global_id(0);\n" " out[i] = ", @@ -181,7 +181,6 @@ static int BuildKernelDouble(const char *name, int vectorSize, kernSize = sizeof(c3) / sizeof(c3[0]); } - char testName[32]; snprintf(testName, sizeof(testName) - 1, "math_kernel%s", sizeNames[vectorSize]); @@ -249,7 +248,7 @@ typedef struct TestInfo int isRangeLimited; // 1 if the function is only to be evaluated over a // range float half_sin_cos_tan_limit; - bool relaxedMode; // True if test is to be run in relaxed mode, false + bool relaxedMode; // True if test is running in relaxed mode, false // otherwise. } TestInfo; @@ -269,10 +268,10 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) // Init test_info memset(&test_info, 0, sizeof(test_info)); test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); test_info.scale = getTestScale(sizeof(cl_float)); + if (gWimpyMode) { test_info.subBufferSize = gWimpyBufferSize @@ -345,8 +344,8 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) ®ion, &error); if (error || NULL == test_info.tinfo[i].outBuf[j]) { - vlog_error("Error: Unable to create sub-buffer of gInBuffer " - "for region {%zd, %zd}\n", + vlog_error("Error: Unable to create sub-buffer of " + "gInBuffer for region {%zd, %zd}\n", region.origin, region.size); goto exit; } @@ -390,6 +389,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) goto exit; } + // Run the kernels if (!gSkipCorrectnessTesting || skipTestingRelaxed) { error = ThreadPool_Do(TestFloat, test_info.jobCount, &test_info); @@ -443,8 +443,9 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) // Run the kernels for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - size_t vectorSize = sizeValues[j] * sizeof(cl_float); - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; + size_t vectorSize = sizeof(cl_float) * sizeValues[j]; + size_t localCount = (BUFFER_SIZE + vectorSize - 1) + / vectorSize; // BUFFER_SIZE / vectorSize rounded up if ((error = clSetKernelArg(test_info.k[j][0], 0, sizeof(gOutBuffer[j]), &gOutBuffer[j]))) { @@ -479,9 +480,9 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) } uint64_t endTime = GetTime(); - double current_time = SubtractTime(endTime, startTime); - sum += current_time; - if (current_time < bestTime) bestTime = current_time; + double time = SubtractTime(endTime, startTime); + sum += time; + if (time < bestTime) bestTime = time; } if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT; @@ -497,6 +498,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: + // Release for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); @@ -553,7 +555,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) cl_uint *out[VECTOR_SIZE_COUNT]; for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - out[j] = (uint32_t *)clEnqueueMapBuffer( + out[j] = (cl_uint *)clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error); if (error || NULL == out[j]) @@ -627,7 +629,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) return error; } - // run the kernel + // Run the kernel size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j]; cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its @@ -655,7 +657,6 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) } } - // Get that moving if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n"); @@ -670,7 +671,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) // an in order queue. for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++) { - out[j] = (uint32_t *)clEnqueueMapBuffer( + out[j] = (cl_uint *)clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error); if (error || NULL == out[j]) @@ -680,6 +681,7 @@ static cl_int TestFloat(cl_uint job_id, cl_uint thread_id, void *data) return error; } } + // Wait for the last buffer out[j] = (uint32_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, @@ -1246,12 +1248,9 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - /* Qualcomm fix: 9461 read-write flags must be compatible with - * parent buffer */ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - /* Qualcomm fix: end */ if (error || NULL == test_info.tinfo[i].outBuf[j]) { vlog_error("Error: Unable to create sub-buffer of gInBuffer " @@ -1281,6 +1280,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) goto exit; } + // Run the kernels if (!gSkipCorrectnessTesting) { error = ThreadPool_Do(TestDouble, test_info.jobCount, &test_info); @@ -1334,8 +1334,9 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) // Run the kernels for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { - size_t vectorSize = sizeValues[j] * sizeof(cl_double); - size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; + size_t vectorSize = sizeof(cl_double) * sizeValues[j]; + size_t localCount = (BUFFER_SIZE + vectorSize - 1) + / vectorSize; // BUFFER_SIZE / vectorSize rounded up if ((error = clSetKernelArg(test_info.k[j][0], 0, sizeof(gOutBuffer[j]), &gOutBuffer[j]))) { @@ -1370,9 +1371,9 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) } uint64_t endTime = GetTime(); - double current_time = SubtractTime(endTime, startTime); - sum += current_time; - if (current_time < bestTime) bestTime = current_time; + double time = SubtractTime(endTime, startTime); + sum += time; + if (time < bestTime) bestTime = time; } if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT; @@ -1393,6 +1394,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: + // Release for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) { clReleaseProgram(test_info.programs[i]); diff --git a/test_conformance/math_brute_force/unary_two_results.cpp b/test_conformance/math_brute_force/unary_two_results.cpp index 77d40b0d..a3be1d8d 100644 --- a/test_conformance/math_brute_force/unary_two_results.cpp +++ b/test_conformance/math_brute_force/unary_two_results.cpp @@ -36,7 +36,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], - "* in)\n" + "* in )\n" "{\n" " int i = get_global_id(0);\n" " out[i] = ", @@ -93,6 +93,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, " }\n" "}\n" }; + const char **kern = c; size_t kernSize = sizeof(c) / sizeof(c[0]); @@ -121,7 +122,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], - "* in)\n" + "* in )\n" "{\n" " int i = get_global_id(0);\n" " out[i] = ", @@ -179,6 +180,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, " }\n" "}\n" }; + const char **kern = c; size_t kernSize = sizeof(c) / sizeof(c[0]); @@ -242,17 +244,19 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) cl_uchar overflow[BUFFER_SIZE / sizeof(float)]; int isFract = 0 == strcmp("fract", f->nameInCode); int skipNanInf = isFract && !gInfNanSupport; - float float_ulps = getAllowedUlpError(f, relaxedMode); logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + float float_ulps = getAllowedUlpError(f, relaxedMode); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if ((error = ThreadPool_Do(BuildKernel_FloatFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - return error; + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernel_FloatFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } for (i = 0; i < (1ULL << 32); i += step) { @@ -282,6 +286,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) } } } + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL))) { @@ -454,7 +459,6 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) if (relaxedMode || skipNanInf) { if (skipNanInf && overflow[j]) continue; - // Note: no double rounding here. Reference functions // calculate in single precision. if (IsFloatInfinity(correct) || IsFloatNaN(correct) @@ -670,6 +674,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) uint32_t *p = (uint32_t *)gIn; for (j = 0; j < bufferSize / sizeof(float); j++) p[j] = genrand_int32(d); + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL))) { @@ -706,7 +711,6 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) double bestTime = INFINITY; for (k = 0; k < PERF_LOOP_COUNT; k++) { - uint64_t startTime = GetTime(); if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, @@ -775,13 +779,13 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode) Force64BitFPUPrecision(); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if ((error = ThreadPool_Do(BuildKernel_DoubleFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) { - return error; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernel_DoubleFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; } for (i = 0; i < (1ULL << 32); i += step) @@ -1103,7 +1107,6 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode) double bestTime = INFINITY; for (k = 0; k < PERF_LOOP_COUNT; k++) { - uint64_t startTime = GetTime(); if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, diff --git a/test_conformance/math_brute_force/unary_two_results_i.cpp b/test_conformance/math_brute_force/unary_two_results_i.cpp index f3c73434..6c56ed1f 100644 --- a/test_conformance/math_brute_force/unary_two_results_i.cpp +++ b/test_conformance/math_brute_force/unary_two_results_i.cpp @@ -37,13 +37,14 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], - "* in)\n" + "* in )\n" "{\n" " int i = get_global_id(0);\n" " out[i] = ", name, "( in[i], out2 + i );\n" "}\n" }; + const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -93,6 +94,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, " }\n" "}\n" }; + const char **kern = c; size_t kernSize = sizeof(c) / sizeof(c[0]); @@ -121,13 +123,14 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], - "* in)\n" + "* in )\n" "{\n" " int i = get_global_id(0);\n" " out[i] = ", name, "( in[i], out2 + i );\n" "}\n" }; + const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -178,6 +181,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, " }\n" "}\n" }; + const char **kern = c; size_t kernSize = sizeof(c) / sizeof(c[0]); @@ -240,13 +244,13 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode) float maxErrorVal = 0.0f; float maxErrorVal2 = 0.0f; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; - float float_ulps; uint64_t step = getTestStep(sizeof(float), bufferSize); int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(float)) + 1); cl_ulong maxiError; logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + float float_ulps; if (gIsEmbedded) float_ulps = f->float_embedded_ulps; else @@ -255,12 +259,14 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode) maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0; // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if ((error = ThreadPool_Do(BuildKernel_FloatFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - return error; + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernel_FloatFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } for (i = 0; i < (1ULL << 32); i += step) { @@ -600,22 +606,21 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) double maxErrorVal2 = 0.0f; cl_ulong maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; - - uint64_t step = getTestStep(sizeof(double), bufferSize); - int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(double)) + 1); + uint64_t step = getTestStep(sizeof(cl_double), bufferSize); + int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(cl_double)) + 1); logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); Force64BitFPUPrecision(); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if ((error = ThreadPool_Do(BuildKernel_DoubleFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) { - return error; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernel_DoubleFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; } for (i = 0; i < (1ULL << 32); i += step) @@ -624,12 +629,12 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) double *p = (double *)gIn; if (gWimpyMode) { - for (j = 0; j < bufferSize / sizeof(double); j++) + for (j = 0; j < bufferSize / sizeof(cl_double); j++) p[j] = DoubleFromUInt32((uint32_t)i + j * scale); } else { - for (j = 0; j < bufferSize / sizeof(double); j++) + for (j = 0; j < bufferSize / sizeof(cl_double); j++) p[j] = DoubleFromUInt32((uint32_t)i + j); } if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, @@ -928,7 +933,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode) double clocksPerOp = bestTime * (double)gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof(double)); - vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sd%s", + vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j]); } for (; j < gMaxVectorSizeIndex; j++) vlog("\t -- "); diff --git a/test_conformance/math_brute_force/unary_u.cpp b/test_conformance/math_brute_force/unary_u.cpp index 53f5db38..df6724ca 100644 --- a/test_conformance/math_brute_force/unary_u.cpp +++ b/test_conformance/math_brute_force/unary_u.cpp @@ -33,13 +33,14 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k, sizeNames[vectorSize], "* out, __global uint", sizeNames[vectorSize], - "* in)\n" + "* in )\n" "{\n" " int i = get_global_id(0);\n" " out[i] = ", name, "( in[i] );\n" "}\n" }; + const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -112,7 +113,7 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, sizeNames[vectorSize], "* out, __global ulong", sizeNames[vectorSize], - "* in)\n" + "* in )\n" "{\n" " int i = get_global_id(0);\n" " out[i] = ", @@ -120,51 +121,53 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, "( in[i] );\n" "}\n" }; - const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", - "__kernel void math_kernel", - sizeNames[vectorSize], - "( __global double* out, __global ulong* in)\n" - "{\n" - " size_t i = get_global_id(0);\n" - " if( i + 1 < get_global_size(0) )\n" - " {\n" - " ulong3 u0 = vload3( 0, in + 3 * i );\n" - " double3 f0 = ", - name, - "( u0 );\n" - " vstore3( f0, 0, out + 3*i );\n" - " }\n" - " else\n" - " {\n" - " size_t parity = i & 1; // Figure out how " - "many elements are left over after BUFFER_SIZE % " - "(3*sizeof(float)). Assume power of two buffer size \n" - " ulong3 u0;\n" - " switch( parity )\n" - " {\n" - " case 1:\n" - " u0 = (ulong3)( in[3*i], " - "0xdeaddeaddeaddeadUL, 0xdeaddeaddeaddeadUL ); \n" - " break;\n" - " case 0:\n" - " u0 = (ulong3)( in[3*i], in[3*i+1], " - "0xdeaddeaddeaddeadUL ); \n" - " break;\n" - " }\n" - " double3 f0 = ", - name, - "( u0 );\n" - " switch( parity )\n" - " {\n" - " case 0:\n" - " out[3*i+1] = f0.y; \n" - " // fall through\n" - " case 1:\n" - " out[3*i] = f0.x; \n" - " break;\n" - " }\n" - " }\n" - "}\n" }; + const char *c3[] = { + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", + "__kernel void math_kernel", + sizeNames[vectorSize], + "( __global double* out, __global ulong* in )\n" + "{\n" + " size_t i = get_global_id(0);\n" + " if( i + 1 < get_global_size(0) )\n" + " {\n" + " ulong3 u0 = vload3( 0, in + 3 * i );\n" + " double3 f0 = ", + name, + "( u0 );\n" + " vstore3( f0, 0, out + 3*i );\n" + " }\n" + " else\n" + " {\n" + " size_t parity = i & 1; // Figure out how many elements are " + "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two " + "buffer size \n" + " ulong3 u0;\n" + " switch( parity )\n" + " {\n" + " case 1:\n" + " u0 = (ulong3)( in[3*i], 0xdeaddeaddeaddeadUL, " + "0xdeaddeaddeaddeadUL ); \n" + " break;\n" + " case 0:\n" + " u0 = (ulong3)( in[3*i], in[3*i+1], " + "0xdeaddeaddeaddeadUL ); \n" + " break;\n" + " }\n" + " double3 f0 = ", + name, + "( u0 );\n" + " switch( parity )\n" + " {\n" + " case 0:\n" + " out[3*i+1] = f0.y; \n" + " // fall through\n" + " case 1:\n" + " out[3*i] = f0.x; \n" + " break;\n" + " }\n" + " }\n" + "}\n" + }; const char **kern = c; size_t kernSize = sizeof(c) / sizeof(c[0]); @@ -175,7 +178,6 @@ static int BuildKernelDouble(const char *name, int vectorSize, cl_kernel *k, kernSize = sizeof(c3) / sizeof(c3[0]); } - char testName[32]; snprintf(testName, sizeof(testName) - 1, "math_kernel%s", sizeNames[vectorSize]); @@ -221,27 +223,28 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); float maxErrorVal = 0.0f; size_t bufferSize = (gWimpyMode) ? gWimpyBufferSize : BUFFER_SIZE; - uint64_t step = getTestStep(sizeof(float), bufferSize); int scale = (int)((1ULL << 32) / (16 * bufferSize / sizeof(double)) + 1); int isRangeLimited = 0; - float float_ulps; float half_sin_cos_tan_limit = 0; logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + float float_ulps; if (gIsEmbedded) float_ulps = f->float_embedded_ulps; else float_ulps = f->float_ulps; // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if ((error = ThreadPool_Do(BuildKernel_FloatFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - return error; + { + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernel_FloatFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; + } if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos")) { @@ -317,7 +320,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL))) { - vlog_error("FAILURE -- could not execute kernel\n"); + vlog_error("FAILED -- could not execute kernel\n"); goto exit; } } @@ -419,7 +422,6 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) } } - if (!gSkipCorrectnessTesting) { if (gWimpyMode) @@ -477,7 +479,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode) &localCount, NULL, 0, NULL, NULL))) { - vlog_error("FAILURE -- could not execute kernel\n"); + vlog_error("FAILED -- could not execute kernel\n"); goto exit; } @@ -540,13 +542,13 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) Force64BitFPUPrecision(); // Init the kernels - BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, - f->nameInCode, relaxedMode }; - if ((error = ThreadPool_Do(BuildKernel_DoubleFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) { - return error; + BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, + f->nameInCode, relaxedMode }; + if ((error = ThreadPool_Do(BuildKernel_DoubleFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + return error; } for (i = 0; i < (1ULL << 32); i += step) @@ -599,7 +601,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL))) { - vlog_error("FAILURE -- could not execute kernel\n"); + vlog_error("FAILED -- could not execute kernel\n"); goto exit; } } @@ -627,7 +629,6 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) if (gSkipCorrectnessTesting) break; - // Verify data uint64_t *t = (uint64_t *)gOut_Ref; for (j = 0; j < bufferSize / sizeof(cl_double); j++) @@ -741,7 +742,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode) &localCount, NULL, 0, NULL, NULL))) { - vlog_error("FAILURE -- could not execute kernel\n"); + vlog_error("FAILED -- could not execute kernel\n"); goto exit; }