mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Change arg type to unsigned int from signed int (#1078)
* Change arg type to unsigned int from signed int * Fix formatting issues
This commit is contained in:
committed by
GitHub
parent
ca673af488
commit
216455842d
@@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -16,27 +16,33 @@
|
||||
#include "common.h"
|
||||
|
||||
static char hash_table_kernel[] =
|
||||
"#if 0\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
|
||||
"#endif\n"
|
||||
"typedef struct BinNode {\n"
|
||||
" int value;\n"
|
||||
" atomic_uintptr_t pNext;\n"
|
||||
"} BinNode;\n"
|
||||
"#if 0\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
|
||||
"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
|
||||
"#endif\n"
|
||||
"typedef struct BinNode {\n"
|
||||
" int value;\n"
|
||||
" atomic_uintptr_t pNext;\n"
|
||||
"} BinNode;\n"
|
||||
|
||||
"__kernel void build_hash_table(__global uint* input, __global BinNode* pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
|
||||
"{\n"
|
||||
" __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, 1, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
|
||||
" uint i = get_global_id(0);\n"
|
||||
" uint b = input[i] % numBins;\n"
|
||||
" pNew->value = input[i];\n"
|
||||
" uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), memory_order_seq_cst, memory_scope_all_svm_devices);\n"
|
||||
" do\n"
|
||||
" {\n"
|
||||
" atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, memory_scope_all_svm_devices);\n" // always inserting at head of list
|
||||
" } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), &next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, memory_scope_all_svm_devices));\n"
|
||||
"}\n";
|
||||
"__kernel void build_hash_table(__global uint* input, __global BinNode* "
|
||||
"pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
|
||||
"{\n"
|
||||
" __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, "
|
||||
"1u, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
|
||||
" uint i = get_global_id(0);\n"
|
||||
" uint b = input[i] % numBins;\n"
|
||||
" pNew->value = input[i];\n"
|
||||
" uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), "
|
||||
"memory_order_seq_cst, memory_scope_all_svm_devices);\n"
|
||||
" do\n"
|
||||
" {\n"
|
||||
" atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, "
|
||||
"memory_scope_all_svm_devices);\n" // always inserting at head of list
|
||||
" } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), "
|
||||
"&next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, "
|
||||
"memory_scope_all_svm_devices));\n"
|
||||
"}\n";
|
||||
|
||||
typedef struct BinNode{
|
||||
cl_uint value;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -17,15 +17,19 @@
|
||||
|
||||
const char *find_targets_kernel[] = {
|
||||
|
||||
"__kernel void find_targets(__global uint* image, uint target, volatile __global atomic_uint *numTargetsFound, volatile __global atomic_uint *targetLocations)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" uint index;\n"
|
||||
" if(image[i] == target) {\n"
|
||||
" index = atomic_fetch_add_explicit(numTargetsFound, 1, memory_order_relaxed, memory_scope_device); \n"
|
||||
" atomic_exchange_explicit(&targetLocations[index], i, memory_order_relaxed, memory_scope_all_svm_devices); \n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"__kernel void find_targets(__global uint* image, uint target, volatile "
|
||||
"__global atomic_uint *numTargetsFound, volatile __global atomic_uint "
|
||||
"*targetLocations)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" uint index;\n"
|
||||
" if(image[i] == target) {\n"
|
||||
" index = atomic_fetch_add_explicit(numTargetsFound, 1u, "
|
||||
"memory_order_relaxed, memory_scope_device); \n"
|
||||
" atomic_exchange_explicit(&targetLocations[index], i, "
|
||||
"memory_order_relaxed, memory_scope_all_svm_devices); \n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -27,271 +27,316 @@
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
extern int gWimpyMode;
|
||||
static const char* helper_ndrange_1d_glo[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
static const char *helper_ndrange_1d_glo[] = {
|
||||
NL,
|
||||
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||
"{" NL,
|
||||
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
|
||||
"memory_order_relaxed, memory_scope_device);" NL,
|
||||
"}" NL,
|
||||
"" NL,
|
||||
"kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, "
|
||||
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
|
||||
"atomic_uint* val, __global uint* ofs_arr)" NL,
|
||||
"{" NL,
|
||||
" size_t tid = get_global_id(0);" NL,
|
||||
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||
"" NL,
|
||||
" for(int i = 0; i < n; i++)" NL,
|
||||
" {" NL,
|
||||
" ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);" NL,
|
||||
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||
" }" NL,
|
||||
"}" NL
|
||||
};
|
||||
|
||||
static const char* helper_ndrange_1d_loc[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int k = 0; k < n; k++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " if (glob_size_arr[i] >= loc_size_arr[k])"
|
||||
NL, " {"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(glob_size_arr[i], loc_size_arr[k]);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
static const char *helper_ndrange_1d_loc[] = {
|
||||
NL,
|
||||
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||
"{" NL,
|
||||
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
|
||||
"memory_order_relaxed, memory_scope_device);" NL,
|
||||
"}" NL,
|
||||
"" NL,
|
||||
"kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, "
|
||||
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
|
||||
"atomic_uint* val, __global uint* ofs_arr)" NL,
|
||||
"{" NL,
|
||||
" size_t tid = get_global_id(0);" NL,
|
||||
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||
"" NL,
|
||||
" for(int k = 0; k < n; k++)" NL,
|
||||
" {" NL,
|
||||
" for(int i = 0; i < n; i++)" NL,
|
||||
" {" NL,
|
||||
" if (glob_size_arr[i] >= loc_size_arr[k])" NL,
|
||||
" {" NL,
|
||||
" ndrange_t ndrange = ndrange_1D(glob_size_arr[i], "
|
||||
"loc_size_arr[k]);" NL,
|
||||
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
"}" NL
|
||||
};
|
||||
|
||||
static const char* helper_ndrange_1d_ofs[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[(get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int l = 0; l < n; l++)"
|
||||
NL, " {"
|
||||
NL, " for(int k = 0; k < n; k++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " if (glob_size_arr[i] >= loc_size_arr[k])"
|
||||
NL, " {"
|
||||
NL, " ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], loc_size_arr[k]);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
static const char *helper_ndrange_1d_ofs[] = {
|
||||
NL,
|
||||
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||
"{" NL,
|
||||
" atomic_fetch_add_explicit(&val[(get_global_offset(0) + "
|
||||
"get_global_linear_id()) % len], 1u, memory_order_relaxed, "
|
||||
"memory_scope_device);" NL,
|
||||
"}" NL,
|
||||
"" NL,
|
||||
"kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, "
|
||||
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
|
||||
"atomic_uint* val, __global uint* ofs_arr)" NL,
|
||||
"{" NL,
|
||||
" size_t tid = get_global_id(0);" NL,
|
||||
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||
"" NL,
|
||||
" for(int l = 0; l < n; l++)" NL,
|
||||
" {" NL,
|
||||
" for(int k = 0; k < n; k++)" NL,
|
||||
" {" NL,
|
||||
" for(int i = 0; i < n; i++)" NL,
|
||||
" {" NL,
|
||||
" if (glob_size_arr[i] >= loc_size_arr[k])" NL,
|
||||
" {" NL,
|
||||
" ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], "
|
||||
"loc_size_arr[k]);" NL,
|
||||
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
"}" NL
|
||||
};
|
||||
|
||||
static const char* helper_ndrange_2d_glo[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
|
||||
NL, " ndrange_t ndrange = ndrange_2D(glob_size);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
static const char *helper_ndrange_2d_glo[] = {
|
||||
NL,
|
||||
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||
"{" NL,
|
||||
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
|
||||
"memory_order_relaxed, memory_scope_device);" NL,
|
||||
"}" NL,
|
||||
"" NL,
|
||||
"kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, "
|
||||
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
|
||||
"val, __global uint* ofs_arr)" NL,
|
||||
"{" NL,
|
||||
" size_t tid = get_global_id(0);" NL,
|
||||
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||
"" NL,
|
||||
" for(int i = 0; i < n; i++)" NL,
|
||||
" {" NL,
|
||||
" size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] "
|
||||
"};" NL,
|
||||
" ndrange_t ndrange = ndrange_2D(glob_size);" NL,
|
||||
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||
" }" NL,
|
||||
"}" NL
|
||||
};
|
||||
|
||||
static const char* helper_ndrange_2d_loc[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int k = 0; k < n; k++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
|
||||
NL, " {"
|
||||
NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
|
||||
NL, " size_t loc_size[] = { 1, loc_size_arr[k] };"
|
||||
NL, ""
|
||||
NL, " ndrange_t ndrange = ndrange_2D(glob_size, loc_size);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
static const char *helper_ndrange_2d_loc[] = {
|
||||
NL,
|
||||
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||
"{" NL,
|
||||
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
|
||||
"memory_order_relaxed, memory_scope_device);" NL,
|
||||
"}" NL,
|
||||
"" NL,
|
||||
"kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, "
|
||||
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
|
||||
"val, __global uint* ofs_arr)" NL,
|
||||
"{" NL,
|
||||
" size_t tid = get_global_id(0);" NL,
|
||||
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||
"" NL,
|
||||
" for(int k = 0; k < n; k++)" NL,
|
||||
" {" NL,
|
||||
" for(int i = 0; i < n; i++)" NL,
|
||||
" {" NL,
|
||||
" if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" NL,
|
||||
" {" NL,
|
||||
" size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
|
||||
"n] };" NL,
|
||||
" size_t loc_size[] = { 1, loc_size_arr[k] };" NL,
|
||||
"" NL,
|
||||
" ndrange_t ndrange = ndrange_2D(glob_size, loc_size);" NL,
|
||||
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
"}" NL
|
||||
};
|
||||
|
||||
|
||||
static const char* helper_ndrange_2d_ofs[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[(get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int l = 0; l < n; l++)"
|
||||
NL, " {"
|
||||
NL, " for(int k = 0; k < n; k++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
|
||||
NL, " {"
|
||||
NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n]};"
|
||||
NL, " size_t loc_size[] = { 1, loc_size_arr[k] };"
|
||||
NL, " size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };"
|
||||
NL, ""
|
||||
NL, " ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
static const char *helper_ndrange_2d_ofs[] = {
|
||||
NL,
|
||||
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||
"{" NL,
|
||||
" atomic_fetch_add_explicit(&val[(get_global_offset(1) * "
|
||||
"get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % "
|
||||
"len], 1u, memory_order_relaxed, memory_scope_device);" NL,
|
||||
"}" NL,
|
||||
"" NL,
|
||||
"kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, "
|
||||
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
|
||||
"val, __global uint* ofs_arr)" NL,
|
||||
"{" NL,
|
||||
" size_t tid = get_global_id(0);" NL,
|
||||
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||
"" NL,
|
||||
" for(int l = 0; l < n; l++)" NL,
|
||||
" {" NL,
|
||||
" for(int k = 0; k < n; k++)" NL,
|
||||
" {" NL,
|
||||
" for(int i = 0; i < n; i++)" NL,
|
||||
" {" NL,
|
||||
" if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" NL,
|
||||
" {" NL,
|
||||
" size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) "
|
||||
"% n]};" NL,
|
||||
" size_t loc_size[] = { 1, loc_size_arr[k] };" NL,
|
||||
" size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };" NL,
|
||||
"" NL,
|
||||
" ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);" NL,
|
||||
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
"}" NL
|
||||
};
|
||||
|
||||
|
||||
static const char* helper_ndrange_3d_glo[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
|
||||
NL, " if (global_work_size <= (len * len))"
|
||||
NL, " {"
|
||||
NL, " size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
|
||||
NL, " ndrange_t ndrange = ndrange_3D(glob_size);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
static const char *helper_ndrange_3d_glo[] = {
|
||||
NL,
|
||||
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||
"{" NL,
|
||||
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
|
||||
"memory_order_relaxed, memory_scope_device);" NL,
|
||||
"}" NL,
|
||||
"" NL,
|
||||
"kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, "
|
||||
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
|
||||
"val, __global uint* ofs_arr)" NL,
|
||||
"{" NL,
|
||||
" size_t tid = get_global_id(0);" NL,
|
||||
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||
"" NL,
|
||||
" for(int i = 0; i < n; i++)" NL,
|
||||
" {" NL,
|
||||
" uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % "
|
||||
"n] * glob_size_arr[(i + 2) % n];" NL,
|
||||
" if (global_work_size <= (len * len))" NL,
|
||||
" {" NL,
|
||||
" size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
|
||||
"n], glob_size_arr[(i + 2) % n] };" NL,
|
||||
" ndrange_t ndrange = ndrange_3D(glob_size);" NL,
|
||||
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
"}" NL
|
||||
};
|
||||
|
||||
|
||||
static const char* helper_ndrange_3d_loc[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int k = 0; k < n; k++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
|
||||
NL, " if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
|
||||
NL, " {"
|
||||
NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
|
||||
NL, " size_t loc_size[] = { 1, 1, loc_size_arr[k] };"
|
||||
NL, " ndrange_t ndrange = ndrange_3D(glob_size,loc_size);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " "
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
static const char *helper_ndrange_3d_loc[] = {
|
||||
NL,
|
||||
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||
"{" NL,
|
||||
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
|
||||
"memory_order_relaxed, memory_scope_device);" NL,
|
||||
"}" NL,
|
||||
"" NL,
|
||||
"kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, "
|
||||
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
|
||||
"val, __global uint* ofs_arr)" NL,
|
||||
"{" NL,
|
||||
" size_t tid = get_global_id(0);" NL,
|
||||
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||
"" NL,
|
||||
" for(int k = 0; k < n; k++)" NL,
|
||||
" {" NL,
|
||||
" for(int i = 0; i < n; i++)" NL,
|
||||
" {" NL,
|
||||
" uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % "
|
||||
"n] * glob_size_arr[(i + 2) % n];" NL,
|
||||
" if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && "
|
||||
"global_work_size <= (len * len))" NL,
|
||||
" {" NL,
|
||||
" size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
|
||||
"n], glob_size_arr[(i + 2) % n] };" NL,
|
||||
" size_t loc_size[] = { 1, 1, loc_size_arr[k] };" NL,
|
||||
" ndrange_t ndrange = ndrange_3D(glob_size,loc_size);" NL,
|
||||
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||
" " NL,
|
||||
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
"}" NL
|
||||
};
|
||||
|
||||
static const char* helper_ndrange_3d_ofs[] =
|
||||
{
|
||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
||||
NL, "{"
|
||||
NL, " atomic_fetch_add_explicit(&val[(get_global_offset(2) * get_global_size(0) * get_global_size(1) + get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
|
||||
NL, "}"
|
||||
NL, ""
|
||||
NL, "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
||||
NL, "{"
|
||||
NL, " size_t tid = get_global_id(0);"
|
||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
||||
NL, ""
|
||||
NL, " for(int l = 0; l < n; l++)"
|
||||
NL, " {"
|
||||
NL, " for(int k = 0; k < n; k++)"
|
||||
NL, " {"
|
||||
NL, " for(int i = 0; i < n; i++)"
|
||||
NL, " {"
|
||||
NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
|
||||
NL, " if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
|
||||
NL, " {"
|
||||
NL, " size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n]};"
|
||||
NL, " size_t loc_size[3] = { 1, 1, loc_size_arr[k] };"
|
||||
NL, " size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l + 2) % n] };"
|
||||
NL, " ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);"
|
||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, " }"
|
||||
NL, "}"
|
||||
NL
|
||||
static const char *helper_ndrange_3d_ofs[] = {
|
||||
NL,
|
||||
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||
"{" NL,
|
||||
" atomic_fetch_add_explicit(&val[(get_global_offset(2) * "
|
||||
"get_global_size(0) * get_global_size(1) + get_global_offset(1) * "
|
||||
"get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % "
|
||||
"len], 1u, memory_order_relaxed, memory_scope_device);" NL,
|
||||
"}" NL,
|
||||
"" NL,
|
||||
"kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, "
|
||||
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
|
||||
"val, __global uint* ofs_arr)" NL,
|
||||
"{" NL,
|
||||
" size_t tid = get_global_id(0);" NL,
|
||||
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||
"" NL,
|
||||
" for(int l = 0; l < n; l++)" NL,
|
||||
" {" NL,
|
||||
" for(int k = 0; k < n; k++)" NL,
|
||||
" {" NL,
|
||||
" for(int i = 0; i < n; i++)" NL,
|
||||
" {" NL,
|
||||
" uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) "
|
||||
"% n] * glob_size_arr[(i + 2) % n];" NL,
|
||||
" if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && "
|
||||
"global_work_size <= (len * len))" NL,
|
||||
" {" NL,
|
||||
" size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) "
|
||||
"% n], glob_size_arr[(i + 2) % n]};" NL,
|
||||
" size_t loc_size[3] = { 1, 1, loc_size_arr[k] };" NL,
|
||||
" size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l "
|
||||
"+ 2) % n] };" NL,
|
||||
" ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);" NL,
|
||||
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
" }" NL,
|
||||
"}" NL
|
||||
};
|
||||
|
||||
static const kernel_src_dim_check sources_ndrange_Xd[] =
|
||||
|
||||
Reference in New Issue
Block a user