// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "procs.h" #include "subhelpers.h" #include "harness/conversions.h" #include "harness/typeWrappers.h" static const char * any_source = "__kernel void test_any(const __global Type *in, __global int2 *xy, __global Type *out)\n" "{\n" " int gid = get_global_id(0);\n" " XY(xy,gid);\n" " out[gid] = sub_group_any(in[gid]);\n" "}\n"; static const char * all_source = "__kernel void test_all(const __global Type *in, __global int2 *xy, __global Type *out)\n" "{\n" " int gid = get_global_id(0);\n" " XY(xy,gid);\n" " out[gid] = sub_group_all(in[gid]);\n" "}\n"; static const char * bcast_source = "__kernel void test_bcast(const __global Type *in, __global int2 *xy, __global Type *out)\n" "{\n" " int gid = get_global_id(0);\n" " XY(xy,gid);\n" " Type x = in[gid];\n" " size_t loid = (size_t)((int)x % 100);\n" " out[gid] = sub_group_broadcast(x, loid);\n" "}\n"; static const char * redadd_source = "__kernel void test_redadd(const __global Type *in, __global int2 *xy, __global Type *out)\n" "{\n" " int gid = get_global_id(0);\n" " XY(xy,gid);\n" " out[gid] = sub_group_reduce_add(in[gid]);\n" "}\n"; static const char * redmax_source = "__kernel void test_redmax(const __global Type *in, __global int2 *xy, __global Type *out)\n" "{\n" " int gid = get_global_id(0);\n" " XY(xy,gid);\n" " out[gid] = sub_group_reduce_max(in[gid]);\n" "}\n"; static const char * redmin_source = "__kernel void test_redmin(const __global Type *in, __global int2 *xy, __global Type *out)\n" "{\n" " int gid = get_global_id(0);\n" " XY(xy,gid);\n" " out[gid] = sub_group_reduce_min(in[gid]);\n" "}\n"; static const char * scinadd_source = "__kernel void test_scinadd(const __global Type *in, __global int2 *xy, __global Type *out)\n" "{\n" " int gid = get_global_id(0);\n" " XY(xy,gid);\n" " out[gid] = sub_group_scan_inclusive_add(in[gid]);\n" "}\n"; static const char * scinmax_source = "__kernel void test_scinmax(const __global Type *in, __global int2 *xy, __global Type *out)\n" "{\n" " int gid = get_global_id(0);\n" " XY(xy,gid);\n" " out[gid] = sub_group_scan_inclusive_max(in[gid]);\n" "}\n"; static const char * scinmin_source = "__kernel void test_scinmin(const __global Type *in, __global int2 *xy, __global Type *out)\n" "{\n" " int gid = get_global_id(0);\n" " XY(xy,gid);\n" " out[gid] = sub_group_scan_inclusive_min(in[gid]);\n" "}\n"; static const char * scexadd_source = "__kernel void test_scexadd(const __global Type *in, __global int2 *xy, __global Type *out)\n" "{\n" " int gid = get_global_id(0);\n" " XY(xy,gid);\n" " out[gid] = sub_group_scan_exclusive_add(in[gid]);\n" "}\n"; static const char * scexmax_source = "__kernel void test_scexmax(const __global Type *in, __global int2 *xy, __global Type *out)\n" "{\n" " int gid = get_global_id(0);\n" " XY(xy,gid);\n" " out[gid] = sub_group_scan_exclusive_max(in[gid]);\n" "}\n"; static const char * scexmin_source = "__kernel void test_scexmin(const __global Type *in, __global int2 *xy, __global Type *out)\n" "{\n" " int gid = get_global_id(0);\n" " XY(xy,gid);\n" " out[gid] = sub_group_scan_exclusive_min(in[gid]);\n" "}\n"; // These need to stay in sync with the kernel source below #define NUM_LOC 49 #define INST_LOC_MASK 0x7f #define INST_OP_SHIFT 0 #define INST_OP_MASK 0xf #define INST_LOC_SHIFT 4 #define INST_VAL_SHIFT 12 #define INST_VAL_MASK 0x7ffff #define INST_END 0x0 #define INST_STORE 0x1 #define INST_WAIT 0x2 #define INST_COUNT 0x3 static const char * ifp_source = "#define NUM_LOC 49\n" "#define INST_LOC_MASK 0x7f\n" "#define INST_OP_SHIFT 0\n" "#define INST_OP_MASK 0xf\n" "#define INST_LOC_SHIFT 4\n" "#define INST_VAL_SHIFT 12\n" "#define INST_VAL_MASK 0x7ffff\n" "#define INST_END 0x0\n" "#define INST_STORE 0x1\n" "#define INST_WAIT 0x2\n" "#define INST_COUNT 0x3\n" "\n" "__kernel void\n" "test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n" "{\n" " __local atomic_int loc[NUM_LOC];\n" "\n" " // Don't run if there is only one sub group\n" " if (get_num_sub_groups() == 1)\n" " return;\n" "\n" " // First initialize loc[]\n" " int lid = (int)get_local_id(0);\n" "\n" " if (lid < NUM_LOC)\n" " atomic_init(loc+lid, 0);\n" "\n" " work_group_barrier(CLK_LOCAL_MEM_FENCE);\n" "\n" " // Compute pointer to this sub group's \"instructions\"\n" " const __global int *pc = in +\n" " ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n" " (int)get_sub_group_id()) *\n" " (NUM_LOC+1);\n" "\n" " // Set up to \"run\"\n" " bool ok = (int)get_sub_group_local_id() == 0;\n" " bool run = true;\n" "\n" " while (run) {\n" " int inst = *pc++;\n" " int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n" " int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n" " int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n" "\n" " switch (iop) {\n" " case INST_STORE:\n" " if (ok)\n" " atomic_store(loc+iloc, ival);\n" " break;\n" " case INST_WAIT:\n" " if (ok) {\n" " while (atomic_load(loc+iloc) != ival)\n" " ;\n" " }\n" " break;\n" " case INST_COUNT:\n" " if (ok) {\n" " int i;\n" " for (i=0;i struct AA { static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng) { int i, ii, j, k, n; int nj = (nw + ns - 1)/ns; int e; ii = 0; for (k=0; k nw ? nw - ii : ns; e = (int)(genrand_int32(gMTdata) % 3); // Initialize data matrix indexed by local id and sub group id switch (e) { case 0: memset(&t[ii], 0, n*sizeof(cl_int)); break; case 1: memset(&t[ii], 0, n*sizeof(cl_int)); i = (int)(genrand_int32(gMTdata) % (cl_uint)n); t[ii + i] = 41; break; case 2: memset(&t[ii], 0xff, n*sizeof(cl_int)); break; } } // Now map into work group using map from device for (j=0;j nw ? nw - ii : ns; // Compute target if (Which == 0) { taa = 0; for (i=0; i struct RED { static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) { int i, ii, j, k, n; int nj = (nw + ns - 1)/ns; ii = 0; for (k=0; k nw ? nw - ii : ns; for (i=0; i::val()); for (k=0; k nw ? nw - ii : ns; // Compute target if (Which == 0) { // add tr = mx[ii]; for (i=1; i mx[ii + i] ? tr : mx[ii + i]; } else if (Which == 2) { // min tr = mx[ii]; for (i=1; i mx[ii + i] ? mx[ii + i] : tr; } // Check result for (i=0; i::val(), i, j, k); return -1; } } } x += nw; y += nw; m += 2*nw; } return 0; } }; // Scan Inclusive functions template struct SCIN { static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) { int i, ii, j, k, n; int nj = (nw + ns - 1)/ns; ii = 0; for (k=0; k nw ? nw - ii : ns; for (i=0; i::val()); for (k=0; k nw ? nw - ii : ns; // Check result for (i=0; i mx[ii + i] ? tr : mx[ii + i]); } else { tr = i == 0 ? mx[ii] : (tr > mx[ii + i] ? mx[ii + i] : tr); } rr = my[ii+i]; if (rr != tr) { log_error("ERROR: sub_group_scan_inclusive_%s(%s) mismatch for local id %d in sub group %d in group %d\n", Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName::val(), i, j, k); return -1; } } } x += nw; y += nw; m += 2*nw; } return 0; } }; // Scan Exclusive functions template struct SCEX { static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) { int i, ii, j, k, n; int nj = (nw + ns - 1)/ns; ii = 0; for (k=0; k nw ? nw - ii : ns; for (i=0; i::val()); for (k=0; k nw ? nw - ii : ns; // Check result for (i=0; i::val() : tr + trt; } else if (Which == 1) { tr = i == 0 ? TypeIdentity::val() : (trt > tr ? trt : tr); } else { tr = i == 0 ? TypeIdentity::val() : (trt > tr ? tr : trt); } trt = mx[ii+i]; rr = my[ii+i]; if (rr != tr) { log_error("ERROR: sub_group_scan_exclusive_%s(%s) mismatch for local id %d in sub group %d in group %d\n", Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName::val(), i, j, k); return -1; } } } x += nw; y += nw; m += 2*nw; } return 0; } }; // Broadcast functios template struct BC { static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng) { int i, ii, j, k, l, n; int nj = (nw + ns - 1)/ns; int d = ns > 100 ? 100 : ns; ii = 0; for (k=0; k nw ? nw - ii : ns; l = (int)(genrand_int32(gMTdata) & 0x7fffffff) % (d > n ? n : d); for (i=0; i::val()); for (k=0; k nw ? nw - ii : ns; l = (int)mx[ii] % 100; tr = mx[ii+l]; // Check result for (i=0; i::val(), i, j, k); return -1; } } } x += nw; y += nw; m += 2*nw; } return 0; } }; // Independent forward progress stuff // Note: // Output needs num_groups * NUM_LOC elements // local_size must be > NUM_LOC // Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements static inline int inst(int op, int loc, int val) { return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT) | (op << INST_OP_SHIFT); } void gen_insts(cl_int *x, cl_int *p, int n) { int i, j0, j1; int val; int ii[NUM_LOC]; // Create a random permutation of 0...NUM_LOC-1 ii[0] = 0; for (i=1; i> INST_OP_SHIFT) & INST_OP_MASK; cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK; cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK; scont = false; switch (iop) { case INST_STORE: loc[iloc] = ival; ++p[i]; scont = true; break; case INST_WAIT: if (loc[iloc] == ival) { ++p[i]; scont = true; } break; case INST_COUNT: loc[iloc] += ival; ++p[i]; scont = true; break; case INST_END: ++nend; break; } } while (scont); } } while (nend < n); // Return result, reusing "p" memcpy(p, loc, sizeof(loc)); } struct IFP { static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng) { int k; int nj = (nw + ns - 1) / ns; // We need at least 2 sub groups per group for this test if (nj == 1) return; for (k=0; k, G, L>::run(device, context, queue, num_elements, "test_any", any_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_all", all_source); // error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source); // error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source); // error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source); // error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source); // error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source); // error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source); // error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source); // error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source); // error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source); // error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); error |= test, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source); error |= test::run(device, context, queue, num_elements, "test_ifp", ifp_source, NUM_LOC + 1); return error; }