Change arg type to unsigned int from signed int (#1078)

* Change arg type to unsigned int from signed int

* Fix formatting issues
This commit is contained in:
Grzegorz Wawiorko
2021-02-02 17:43:37 +01:00
committed by GitHub
parent ca673af488
commit 216455842d
3 changed files with 333 additions and 278 deletions

View File

@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -16,27 +16,33 @@
#include "common.h"
static char hash_table_kernel[] =
"#if 0\n"
"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
"#endif\n"
"typedef struct BinNode {\n"
" int value;\n"
" atomic_uintptr_t pNext;\n"
"} BinNode;\n"
"#if 0\n"
"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
"#endif\n"
"typedef struct BinNode {\n"
" int value;\n"
" atomic_uintptr_t pNext;\n"
"} BinNode;\n"
"__kernel void build_hash_table(__global uint* input, __global BinNode* pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
"{\n"
" __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, 1, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
" uint i = get_global_id(0);\n"
" uint b = input[i] % numBins;\n"
" pNew->value = input[i];\n"
" uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), memory_order_seq_cst, memory_scope_all_svm_devices);\n"
" do\n"
" {\n"
" atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, memory_scope_all_svm_devices);\n" // always inserting at head of list
" } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), &next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, memory_scope_all_svm_devices));\n"
"}\n";
"__kernel void build_hash_table(__global uint* input, __global BinNode* "
"pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
"{\n"
" __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, "
"1u, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
" uint i = get_global_id(0);\n"
" uint b = input[i] % numBins;\n"
" pNew->value = input[i];\n"
" uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), "
"memory_order_seq_cst, memory_scope_all_svm_devices);\n"
" do\n"
" {\n"
" atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, "
"memory_scope_all_svm_devices);\n" // always inserting at head of list
" } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), "
"&next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, "
"memory_scope_all_svm_devices));\n"
"}\n";
typedef struct BinNode{
cl_uint value;

View File

@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -17,15 +17,19 @@
const char *find_targets_kernel[] = {
"__kernel void find_targets(__global uint* image, uint target, volatile __global atomic_uint *numTargetsFound, volatile __global atomic_uint *targetLocations)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" uint index;\n"
" if(image[i] == target) {\n"
" index = atomic_fetch_add_explicit(numTargetsFound, 1, memory_order_relaxed, memory_scope_device); \n"
" atomic_exchange_explicit(&targetLocations[index], i, memory_order_relaxed, memory_scope_all_svm_devices); \n"
" }\n"
"}\n"
"__kernel void find_targets(__global uint* image, uint target, volatile "
"__global atomic_uint *numTargetsFound, volatile __global atomic_uint "
"*targetLocations)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" uint index;\n"
" if(image[i] == target) {\n"
" index = atomic_fetch_add_explicit(numTargetsFound, 1u, "
"memory_order_relaxed, memory_scope_device); \n"
" atomic_exchange_explicit(&targetLocations[index], i, "
"memory_order_relaxed, memory_scope_all_svm_devices); \n"
" }\n"
"}\n"
};

View File

@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -27,271 +27,316 @@
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
static const char* helper_ndrange_1d_glo[] =
{
NL, "void block_fn(int len, __global atomic_uint* val)"
NL, "{"
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
NL, "}"
NL, ""
NL, "kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)"
NL, "{"
NL, " size_t tid = get_global_id(0);"
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
NL, ""
NL, " for(int i = 0; i < n; i++)"
NL, " {"
NL, " ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);"
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
NL, " }"
NL, "}"
NL
static const char *helper_ndrange_1d_glo[] = {
NL,
"void block_fn(int len, __global atomic_uint* val)" NL,
"{" NL,
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
"memory_order_relaxed, memory_scope_device);" NL,
"}" NL,
"" NL,
"kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
"" NL,
" for(int i = 0; i < n; i++)" NL,
" {" NL,
" ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);" NL,
" int enq_res = enqueue_kernel(get_default_queue(), "
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
" }" NL,
"}" NL
};
static const char* helper_ndrange_1d_loc[] =
{
NL, "void block_fn(int len, __global atomic_uint* val)"
NL, "{"
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
NL, "}"
NL, ""
NL, "kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)"
NL, "{"
NL, " size_t tid = get_global_id(0);"
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
NL, ""
NL, " for(int k = 0; k < n; k++)"
NL, " {"
NL, " for(int i = 0; i < n; i++)"
NL, " {"
NL, " if (glob_size_arr[i] >= loc_size_arr[k])"
NL, " {"
NL, " ndrange_t ndrange = ndrange_1D(glob_size_arr[i], loc_size_arr[k]);"
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
NL, " }"
NL, " }"
NL, " }"
NL, "}"
NL
static const char *helper_ndrange_1d_loc[] = {
NL,
"void block_fn(int len, __global atomic_uint* val)" NL,
"{" NL,
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
"memory_order_relaxed, memory_scope_device);" NL,
"}" NL,
"" NL,
"kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
"" NL,
" for(int k = 0; k < n; k++)" NL,
" {" NL,
" for(int i = 0; i < n; i++)" NL,
" {" NL,
" if (glob_size_arr[i] >= loc_size_arr[k])" NL,
" {" NL,
" ndrange_t ndrange = ndrange_1D(glob_size_arr[i], "
"loc_size_arr[k]);" NL,
" int enq_res = enqueue_kernel(get_default_queue(), "
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
" }" NL,
" }" NL,
" }" NL,
"}" NL
};
static const char* helper_ndrange_1d_ofs[] =
{
NL, "void block_fn(int len, __global atomic_uint* val)"
NL, "{"
NL, " atomic_fetch_add_explicit(&val[(get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
NL, "}"
NL, ""
NL, "kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)"
NL, "{"
NL, " size_t tid = get_global_id(0);"
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
NL, ""
NL, " for(int l = 0; l < n; l++)"
NL, " {"
NL, " for(int k = 0; k < n; k++)"
NL, " {"
NL, " for(int i = 0; i < n; i++)"
NL, " {"
NL, " if (glob_size_arr[i] >= loc_size_arr[k])"
NL, " {"
NL, " ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], loc_size_arr[k]);"
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
NL, " }"
NL, " }"
NL, " }"
NL, " }"
NL, "}"
NL
static const char *helper_ndrange_1d_ofs[] = {
NL,
"void block_fn(int len, __global atomic_uint* val)" NL,
"{" NL,
" atomic_fetch_add_explicit(&val[(get_global_offset(0) + "
"get_global_linear_id()) % len], 1u, memory_order_relaxed, "
"memory_scope_device);" NL,
"}" NL,
"" NL,
"kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
"atomic_uint* val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
"" NL,
" for(int l = 0; l < n; l++)" NL,
" {" NL,
" for(int k = 0; k < n; k++)" NL,
" {" NL,
" for(int i = 0; i < n; i++)" NL,
" {" NL,
" if (glob_size_arr[i] >= loc_size_arr[k])" NL,
" {" NL,
" ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], "
"loc_size_arr[k]);" NL,
" int enq_res = enqueue_kernel(get_default_queue(), "
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
" }" NL,
" }" NL,
" }" NL,
" }" NL,
"}" NL
};
static const char* helper_ndrange_2d_glo[] =
{
NL, "void block_fn(int len, __global atomic_uint* val)"
NL, "{"
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
NL, "}"
NL, ""
NL, "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
NL, "{"
NL, " size_t tid = get_global_id(0);"
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
NL, ""
NL, " for(int i = 0; i < n; i++)"
NL, " {"
NL, " size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
NL, " ndrange_t ndrange = ndrange_2D(glob_size);"
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
NL, " }"
NL, "}"
NL
static const char *helper_ndrange_2d_glo[] = {
NL,
"void block_fn(int len, __global atomic_uint* val)" NL,
"{" NL,
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
"memory_order_relaxed, memory_scope_device);" NL,
"}" NL,
"" NL,
"kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
"val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
"" NL,
" for(int i = 0; i < n; i++)" NL,
" {" NL,
" size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] "
"};" NL,
" ndrange_t ndrange = ndrange_2D(glob_size);" NL,
" int enq_res = enqueue_kernel(get_default_queue(), "
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
" }" NL,
"}" NL
};
static const char* helper_ndrange_2d_loc[] =
{
NL, "void block_fn(int len, __global atomic_uint* val)"
NL, "{"
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
NL, "}"
NL, ""
NL, "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
NL, "{"
NL, " size_t tid = get_global_id(0);"
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
NL, ""
NL, " for(int k = 0; k < n; k++)"
NL, " {"
NL, " for(int i = 0; i < n; i++)"
NL, " {"
NL, " if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
NL, " {"
NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
NL, " size_t loc_size[] = { 1, loc_size_arr[k] };"
NL, ""
NL, " ndrange_t ndrange = ndrange_2D(glob_size, loc_size);"
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
NL, " }"
NL, " }"
NL, " }"
NL, "}"
NL
static const char *helper_ndrange_2d_loc[] = {
NL,
"void block_fn(int len, __global atomic_uint* val)" NL,
"{" NL,
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
"memory_order_relaxed, memory_scope_device);" NL,
"}" NL,
"" NL,
"kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
"val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
"" NL,
" for(int k = 0; k < n; k++)" NL,
" {" NL,
" for(int i = 0; i < n; i++)" NL,
" {" NL,
" if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" NL,
" {" NL,
" size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
"n] };" NL,
" size_t loc_size[] = { 1, loc_size_arr[k] };" NL,
"" NL,
" ndrange_t ndrange = ndrange_2D(glob_size, loc_size);" NL,
" int enq_res = enqueue_kernel(get_default_queue(), "
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
" }" NL,
" }" NL,
" }" NL,
"}" NL
};
static const char* helper_ndrange_2d_ofs[] =
{
NL, "void block_fn(int len, __global atomic_uint* val)"
NL, "{"
NL, " atomic_fetch_add_explicit(&val[(get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
NL, "}"
NL, ""
NL, "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
NL, "{"
NL, " size_t tid = get_global_id(0);"
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
NL, ""
NL, " for(int l = 0; l < n; l++)"
NL, " {"
NL, " for(int k = 0; k < n; k++)"
NL, " {"
NL, " for(int i = 0; i < n; i++)"
NL, " {"
NL, " if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
NL, " {"
NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n]};"
NL, " size_t loc_size[] = { 1, loc_size_arr[k] };"
NL, " size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };"
NL, ""
NL, " ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);"
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
NL, " }"
NL, " }"
NL, " }"
NL, " }"
NL, "}"
NL
static const char *helper_ndrange_2d_ofs[] = {
NL,
"void block_fn(int len, __global atomic_uint* val)" NL,
"{" NL,
" atomic_fetch_add_explicit(&val[(get_global_offset(1) * "
"get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % "
"len], 1u, memory_order_relaxed, memory_scope_device);" NL,
"}" NL,
"" NL,
"kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
"val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
"" NL,
" for(int l = 0; l < n; l++)" NL,
" {" NL,
" for(int k = 0; k < n; k++)" NL,
" {" NL,
" for(int i = 0; i < n; i++)" NL,
" {" NL,
" if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" NL,
" {" NL,
" size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) "
"% n]};" NL,
" size_t loc_size[] = { 1, loc_size_arr[k] };" NL,
" size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };" NL,
"" NL,
" ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);" NL,
" int enq_res = enqueue_kernel(get_default_queue(), "
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
" }" NL,
" }" NL,
" }" NL,
" }" NL,
"}" NL
};
static const char* helper_ndrange_3d_glo[] =
{
NL, "void block_fn(int len, __global atomic_uint* val)"
NL, "{"
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
NL, "}"
NL, ""
NL, "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
NL, "{"
NL, " size_t tid = get_global_id(0);"
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
NL, ""
NL, " for(int i = 0; i < n; i++)"
NL, " {"
NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
NL, " if (global_work_size <= (len * len))"
NL, " {"
NL, " size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
NL, " ndrange_t ndrange = ndrange_3D(glob_size);"
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
NL, " }"
NL, " }"
NL, "}"
NL
static const char *helper_ndrange_3d_glo[] = {
NL,
"void block_fn(int len, __global atomic_uint* val)" NL,
"{" NL,
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
"memory_order_relaxed, memory_scope_device);" NL,
"}" NL,
"" NL,
"kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
"val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
"" NL,
" for(int i = 0; i < n; i++)" NL,
" {" NL,
" uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % "
"n] * glob_size_arr[(i + 2) % n];" NL,
" if (global_work_size <= (len * len))" NL,
" {" NL,
" size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
"n], glob_size_arr[(i + 2) % n] };" NL,
" ndrange_t ndrange = ndrange_3D(glob_size);" NL,
" int enq_res = enqueue_kernel(get_default_queue(), "
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
" }" NL,
" }" NL,
"}" NL
};
static const char* helper_ndrange_3d_loc[] =
{
NL, "void block_fn(int len, __global atomic_uint* val)"
NL, "{"
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
NL, "}"
NL, ""
NL, "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
NL, "{"
NL, " size_t tid = get_global_id(0);"
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
NL, ""
NL, " for(int k = 0; k < n; k++)"
NL, " {"
NL, " for(int i = 0; i < n; i++)"
NL, " {"
NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
NL, " if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
NL, " {"
NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
NL, " size_t loc_size[] = { 1, 1, loc_size_arr[k] };"
NL, " ndrange_t ndrange = ndrange_3D(glob_size,loc_size);"
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
NL, " "
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
NL, " }"
NL, " }"
NL, " }"
NL, "}"
NL
static const char *helper_ndrange_3d_loc[] = {
NL,
"void block_fn(int len, __global atomic_uint* val)" NL,
"{" NL,
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
"memory_order_relaxed, memory_scope_device);" NL,
"}" NL,
"" NL,
"kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
"val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
"" NL,
" for(int k = 0; k < n; k++)" NL,
" {" NL,
" for(int i = 0; i < n; i++)" NL,
" {" NL,
" uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % "
"n] * glob_size_arr[(i + 2) % n];" NL,
" if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && "
"global_work_size <= (len * len))" NL,
" {" NL,
" size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
"n], glob_size_arr[(i + 2) % n] };" NL,
" size_t loc_size[] = { 1, 1, loc_size_arr[k] };" NL,
" ndrange_t ndrange = ndrange_3D(glob_size,loc_size);" NL,
" int enq_res = enqueue_kernel(get_default_queue(), "
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
" " NL,
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
" }" NL,
" }" NL,
" }" NL,
"}" NL
};
static const char* helper_ndrange_3d_ofs[] =
{
NL, "void block_fn(int len, __global atomic_uint* val)"
NL, "{"
NL, " atomic_fetch_add_explicit(&val[(get_global_offset(2) * get_global_size(0) * get_global_size(1) + get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
NL, "}"
NL, ""
NL, "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
NL, "{"
NL, " size_t tid = get_global_id(0);"
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
NL, ""
NL, " for(int l = 0; l < n; l++)"
NL, " {"
NL, " for(int k = 0; k < n; k++)"
NL, " {"
NL, " for(int i = 0; i < n; i++)"
NL, " {"
NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
NL, " if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
NL, " {"
NL, " size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n]};"
NL, " size_t loc_size[3] = { 1, 1, loc_size_arr[k] };"
NL, " size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l + 2) % n] };"
NL, " ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);"
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
NL, " }"
NL, " }"
NL, " }"
NL, " }"
NL, "}"
NL
static const char *helper_ndrange_3d_ofs[] = {
NL,
"void block_fn(int len, __global atomic_uint* val)" NL,
"{" NL,
" atomic_fetch_add_explicit(&val[(get_global_offset(2) * "
"get_global_size(0) * get_global_size(1) + get_global_offset(1) * "
"get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % "
"len], 1u, memory_order_relaxed, memory_scope_device);" NL,
"}" NL,
"" NL,
"kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, "
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
"val, __global uint* ofs_arr)" NL,
"{" NL,
" size_t tid = get_global_id(0);" NL,
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
"" NL,
" for(int l = 0; l < n; l++)" NL,
" {" NL,
" for(int k = 0; k < n; k++)" NL,
" {" NL,
" for(int i = 0; i < n; i++)" NL,
" {" NL,
" uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) "
"% n] * glob_size_arr[(i + 2) % n];" NL,
" if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && "
"global_work_size <= (len * len))" NL,
" {" NL,
" size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) "
"% n], glob_size_arr[(i + 2) % n]};" NL,
" size_t loc_size[3] = { 1, 1, loc_size_arr[k] };" NL,
" size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l "
"+ 2) % n] };" NL,
" ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);" NL,
" int enq_res = enqueue_kernel(get_default_queue(), "
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
" }" NL,
" }" NL,
" }" NL,
" }" NL,
"}" NL
};
static const kernel_src_dim_check sources_ndrange_Xd[] =