mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Change arg type to unsigned int from signed int (#1078)
* Change arg type to unsigned int from signed int * Fix formatting issues
This commit is contained in:
committed by
GitHub
parent
ca673af488
commit
216455842d
@@ -1,6 +1,6 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
// You may obtain a copy of the License at
|
// You may obtain a copy of the License at
|
||||||
@@ -16,27 +16,33 @@
|
|||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
static char hash_table_kernel[] =
|
static char hash_table_kernel[] =
|
||||||
"#if 0\n"
|
"#if 0\n"
|
||||||
"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
|
"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
|
||||||
"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
|
"#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
"typedef struct BinNode {\n"
|
"typedef struct BinNode {\n"
|
||||||
" int value;\n"
|
" int value;\n"
|
||||||
" atomic_uintptr_t pNext;\n"
|
" atomic_uintptr_t pNext;\n"
|
||||||
"} BinNode;\n"
|
"} BinNode;\n"
|
||||||
|
|
||||||
"__kernel void build_hash_table(__global uint* input, __global BinNode* pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
|
"__kernel void build_hash_table(__global uint* input, __global BinNode* "
|
||||||
"{\n"
|
"pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
|
||||||
" __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, 1, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
|
"{\n"
|
||||||
" uint i = get_global_id(0);\n"
|
" __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, "
|
||||||
" uint b = input[i] % numBins;\n"
|
"1u, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
|
||||||
" pNew->value = input[i];\n"
|
" uint i = get_global_id(0);\n"
|
||||||
" uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), memory_order_seq_cst, memory_scope_all_svm_devices);\n"
|
" uint b = input[i] % numBins;\n"
|
||||||
" do\n"
|
" pNew->value = input[i];\n"
|
||||||
" {\n"
|
" uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), "
|
||||||
" atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, memory_scope_all_svm_devices);\n" // always inserting at head of list
|
"memory_order_seq_cst, memory_scope_all_svm_devices);\n"
|
||||||
" } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), &next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, memory_scope_all_svm_devices));\n"
|
" do\n"
|
||||||
"}\n";
|
" {\n"
|
||||||
|
" atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, "
|
||||||
|
"memory_scope_all_svm_devices);\n" // always inserting at head of list
|
||||||
|
" } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), "
|
||||||
|
"&next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, "
|
||||||
|
"memory_scope_all_svm_devices));\n"
|
||||||
|
"}\n";
|
||||||
|
|
||||||
typedef struct BinNode{
|
typedef struct BinNode{
|
||||||
cl_uint value;
|
cl_uint value;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
// You may obtain a copy of the License at
|
// You may obtain a copy of the License at
|
||||||
@@ -17,15 +17,19 @@
|
|||||||
|
|
||||||
const char *find_targets_kernel[] = {
|
const char *find_targets_kernel[] = {
|
||||||
|
|
||||||
"__kernel void find_targets(__global uint* image, uint target, volatile __global atomic_uint *numTargetsFound, volatile __global atomic_uint *targetLocations)\n"
|
"__kernel void find_targets(__global uint* image, uint target, volatile "
|
||||||
"{\n"
|
"__global atomic_uint *numTargetsFound, volatile __global atomic_uint "
|
||||||
" size_t i = get_global_id(0);\n"
|
"*targetLocations)\n"
|
||||||
" uint index;\n"
|
"{\n"
|
||||||
" if(image[i] == target) {\n"
|
" size_t i = get_global_id(0);\n"
|
||||||
" index = atomic_fetch_add_explicit(numTargetsFound, 1, memory_order_relaxed, memory_scope_device); \n"
|
" uint index;\n"
|
||||||
" atomic_exchange_explicit(&targetLocations[index], i, memory_order_relaxed, memory_scope_all_svm_devices); \n"
|
" if(image[i] == target) {\n"
|
||||||
" }\n"
|
" index = atomic_fetch_add_explicit(numTargetsFound, 1u, "
|
||||||
"}\n"
|
"memory_order_relaxed, memory_scope_device); \n"
|
||||||
|
" atomic_exchange_explicit(&targetLocations[index], i, "
|
||||||
|
"memory_order_relaxed, memory_scope_all_svm_devices); \n"
|
||||||
|
" }\n"
|
||||||
|
"}\n"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2017 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
// You may obtain a copy of the License at
|
// You may obtain a copy of the License at
|
||||||
@@ -27,271 +27,316 @@
|
|||||||
|
|
||||||
#ifdef CL_VERSION_2_0
|
#ifdef CL_VERSION_2_0
|
||||||
extern int gWimpyMode;
|
extern int gWimpyMode;
|
||||||
static const char* helper_ndrange_1d_glo[] =
|
static const char *helper_ndrange_1d_glo[] = {
|
||||||
{
|
NL,
|
||||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||||
NL, "{"
|
"{" NL,
|
||||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
|
||||||
NL, "}"
|
"memory_order_relaxed, memory_scope_device);" NL,
|
||||||
NL, ""
|
"}" NL,
|
||||||
NL, "kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)"
|
"" NL,
|
||||||
NL, "{"
|
"kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, "
|
||||||
NL, " size_t tid = get_global_id(0);"
|
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
|
||||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
"atomic_uint* val, __global uint* ofs_arr)" NL,
|
||||||
NL, ""
|
"{" NL,
|
||||||
NL, " for(int i = 0; i < n; i++)"
|
" size_t tid = get_global_id(0);" NL,
|
||||||
NL, " {"
|
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||||
NL, " ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);"
|
"" NL,
|
||||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
" for(int i = 0; i < n; i++)" NL,
|
||||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
" {" NL,
|
||||||
NL, " }"
|
" ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);" NL,
|
||||||
NL, "}"
|
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||||
NL
|
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||||
|
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||||
|
" }" NL,
|
||||||
|
"}" NL
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char* helper_ndrange_1d_loc[] =
|
static const char *helper_ndrange_1d_loc[] = {
|
||||||
{
|
NL,
|
||||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||||
NL, "{"
|
"{" NL,
|
||||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
|
||||||
NL, "}"
|
"memory_order_relaxed, memory_scope_device);" NL,
|
||||||
NL, ""
|
"}" NL,
|
||||||
NL, "kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)"
|
"" NL,
|
||||||
NL, "{"
|
"kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, "
|
||||||
NL, " size_t tid = get_global_id(0);"
|
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
|
||||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
"atomic_uint* val, __global uint* ofs_arr)" NL,
|
||||||
NL, ""
|
"{" NL,
|
||||||
NL, " for(int k = 0; k < n; k++)"
|
" size_t tid = get_global_id(0);" NL,
|
||||||
NL, " {"
|
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||||
NL, " for(int i = 0; i < n; i++)"
|
"" NL,
|
||||||
NL, " {"
|
" for(int k = 0; k < n; k++)" NL,
|
||||||
NL, " if (glob_size_arr[i] >= loc_size_arr[k])"
|
" {" NL,
|
||||||
NL, " {"
|
" for(int i = 0; i < n; i++)" NL,
|
||||||
NL, " ndrange_t ndrange = ndrange_1D(glob_size_arr[i], loc_size_arr[k]);"
|
" {" NL,
|
||||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
" if (glob_size_arr[i] >= loc_size_arr[k])" NL,
|
||||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
" {" NL,
|
||||||
NL, " }"
|
" ndrange_t ndrange = ndrange_1D(glob_size_arr[i], "
|
||||||
NL, " }"
|
"loc_size_arr[k]);" NL,
|
||||||
NL, " }"
|
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||||
NL, "}"
|
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||||
NL
|
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
"}" NL
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char* helper_ndrange_1d_ofs[] =
|
static const char *helper_ndrange_1d_ofs[] = {
|
||||||
{
|
NL,
|
||||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||||
NL, "{"
|
"{" NL,
|
||||||
NL, " atomic_fetch_add_explicit(&val[(get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
|
" atomic_fetch_add_explicit(&val[(get_global_offset(0) + "
|
||||||
NL, "}"
|
"get_global_linear_id()) % len], 1u, memory_order_relaxed, "
|
||||||
NL, ""
|
"memory_scope_device);" NL,
|
||||||
NL, "kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val, __global uint* ofs_arr)"
|
"}" NL,
|
||||||
NL, "{"
|
"" NL,
|
||||||
NL, " size_t tid = get_global_id(0);"
|
"kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, "
|
||||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
|
||||||
NL, ""
|
"atomic_uint* val, __global uint* ofs_arr)" NL,
|
||||||
NL, " for(int l = 0; l < n; l++)"
|
"{" NL,
|
||||||
NL, " {"
|
" size_t tid = get_global_id(0);" NL,
|
||||||
NL, " for(int k = 0; k < n; k++)"
|
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||||
NL, " {"
|
"" NL,
|
||||||
NL, " for(int i = 0; i < n; i++)"
|
" for(int l = 0; l < n; l++)" NL,
|
||||||
NL, " {"
|
" {" NL,
|
||||||
NL, " if (glob_size_arr[i] >= loc_size_arr[k])"
|
" for(int k = 0; k < n; k++)" NL,
|
||||||
NL, " {"
|
" {" NL,
|
||||||
NL, " ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], loc_size_arr[k]);"
|
" for(int i = 0; i < n; i++)" NL,
|
||||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
" {" NL,
|
||||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
" if (glob_size_arr[i] >= loc_size_arr[k])" NL,
|
||||||
NL, " }"
|
" {" NL,
|
||||||
NL, " }"
|
" ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], "
|
||||||
NL, " }"
|
"loc_size_arr[k]);" NL,
|
||||||
NL, " }"
|
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||||
NL, "}"
|
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||||
NL
|
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
"}" NL
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char* helper_ndrange_2d_glo[] =
|
static const char *helper_ndrange_2d_glo[] = {
|
||||||
{
|
NL,
|
||||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||||
NL, "{"
|
"{" NL,
|
||||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
|
||||||
NL, "}"
|
"memory_order_relaxed, memory_scope_device);" NL,
|
||||||
NL, ""
|
"}" NL,
|
||||||
NL, "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
"" NL,
|
||||||
NL, "{"
|
"kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, "
|
||||||
NL, " size_t tid = get_global_id(0);"
|
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
|
||||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
"val, __global uint* ofs_arr)" NL,
|
||||||
NL, ""
|
"{" NL,
|
||||||
NL, " for(int i = 0; i < n; i++)"
|
" size_t tid = get_global_id(0);" NL,
|
||||||
NL, " {"
|
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||||
NL, " size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
|
"" NL,
|
||||||
NL, " ndrange_t ndrange = ndrange_2D(glob_size);"
|
" for(int i = 0; i < n; i++)" NL,
|
||||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
" {" NL,
|
||||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
" size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] "
|
||||||
NL, " }"
|
"};" NL,
|
||||||
NL, "}"
|
" ndrange_t ndrange = ndrange_2D(glob_size);" NL,
|
||||||
NL
|
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||||
|
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||||
|
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||||
|
" }" NL,
|
||||||
|
"}" NL
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char* helper_ndrange_2d_loc[] =
|
static const char *helper_ndrange_2d_loc[] = {
|
||||||
{
|
NL,
|
||||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||||
NL, "{"
|
"{" NL,
|
||||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
|
||||||
NL, "}"
|
"memory_order_relaxed, memory_scope_device);" NL,
|
||||||
NL, ""
|
"}" NL,
|
||||||
NL, "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
"" NL,
|
||||||
NL, "{"
|
"kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, "
|
||||||
NL, " size_t tid = get_global_id(0);"
|
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
|
||||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
"val, __global uint* ofs_arr)" NL,
|
||||||
NL, ""
|
"{" NL,
|
||||||
NL, " for(int k = 0; k < n; k++)"
|
" size_t tid = get_global_id(0);" NL,
|
||||||
NL, " {"
|
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||||
NL, " for(int i = 0; i < n; i++)"
|
"" NL,
|
||||||
NL, " {"
|
" for(int k = 0; k < n; k++)" NL,
|
||||||
NL, " if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
|
" {" NL,
|
||||||
NL, " {"
|
" for(int i = 0; i < n; i++)" NL,
|
||||||
NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
|
" {" NL,
|
||||||
NL, " size_t loc_size[] = { 1, loc_size_arr[k] };"
|
" if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" NL,
|
||||||
NL, ""
|
" {" NL,
|
||||||
NL, " ndrange_t ndrange = ndrange_2D(glob_size, loc_size);"
|
" size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
|
||||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
"n] };" NL,
|
||||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
" size_t loc_size[] = { 1, loc_size_arr[k] };" NL,
|
||||||
NL, " }"
|
"" NL,
|
||||||
NL, " }"
|
" ndrange_t ndrange = ndrange_2D(glob_size, loc_size);" NL,
|
||||||
NL, " }"
|
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||||
NL, "}"
|
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||||
NL
|
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
"}" NL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static const char* helper_ndrange_2d_ofs[] =
|
static const char *helper_ndrange_2d_ofs[] = {
|
||||||
{
|
NL,
|
||||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||||
NL, "{"
|
"{" NL,
|
||||||
NL, " atomic_fetch_add_explicit(&val[(get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
|
" atomic_fetch_add_explicit(&val[(get_global_offset(1) * "
|
||||||
NL, "}"
|
"get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % "
|
||||||
NL, ""
|
"len], 1u, memory_order_relaxed, memory_scope_device);" NL,
|
||||||
NL, "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
"}" NL,
|
||||||
NL, "{"
|
"" NL,
|
||||||
NL, " size_t tid = get_global_id(0);"
|
"kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, "
|
||||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
|
||||||
NL, ""
|
"val, __global uint* ofs_arr)" NL,
|
||||||
NL, " for(int l = 0; l < n; l++)"
|
"{" NL,
|
||||||
NL, " {"
|
" size_t tid = get_global_id(0);" NL,
|
||||||
NL, " for(int k = 0; k < n; k++)"
|
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||||
NL, " {"
|
"" NL,
|
||||||
NL, " for(int i = 0; i < n; i++)"
|
" for(int l = 0; l < n; l++)" NL,
|
||||||
NL, " {"
|
" {" NL,
|
||||||
NL, " if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
|
" for(int k = 0; k < n; k++)" NL,
|
||||||
NL, " {"
|
" {" NL,
|
||||||
NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n]};"
|
" for(int i = 0; i < n; i++)" NL,
|
||||||
NL, " size_t loc_size[] = { 1, loc_size_arr[k] };"
|
" {" NL,
|
||||||
NL, " size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };"
|
" if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" NL,
|
||||||
NL, ""
|
" {" NL,
|
||||||
NL, " ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);"
|
" size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) "
|
||||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
"% n]};" NL,
|
||||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
" size_t loc_size[] = { 1, loc_size_arr[k] };" NL,
|
||||||
NL, " }"
|
" size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };" NL,
|
||||||
NL, " }"
|
"" NL,
|
||||||
NL, " }"
|
" ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);" NL,
|
||||||
NL, " }"
|
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||||
NL, "}"
|
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||||
NL
|
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
"}" NL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static const char* helper_ndrange_3d_glo[] =
|
static const char *helper_ndrange_3d_glo[] = {
|
||||||
{
|
NL,
|
||||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||||
NL, "{"
|
"{" NL,
|
||||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
|
||||||
NL, "}"
|
"memory_order_relaxed, memory_scope_device);" NL,
|
||||||
NL, ""
|
"}" NL,
|
||||||
NL, "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
"" NL,
|
||||||
NL, "{"
|
"kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, "
|
||||||
NL, " size_t tid = get_global_id(0);"
|
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
|
||||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
"val, __global uint* ofs_arr)" NL,
|
||||||
NL, ""
|
"{" NL,
|
||||||
NL, " for(int i = 0; i < n; i++)"
|
" size_t tid = get_global_id(0);" NL,
|
||||||
NL, " {"
|
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||||
NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
|
"" NL,
|
||||||
NL, " if (global_work_size <= (len * len))"
|
" for(int i = 0; i < n; i++)" NL,
|
||||||
NL, " {"
|
" {" NL,
|
||||||
NL, " size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
|
" uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % "
|
||||||
NL, " ndrange_t ndrange = ndrange_3D(glob_size);"
|
"n] * glob_size_arr[(i + 2) % n];" NL,
|
||||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
" if (global_work_size <= (len * len))" NL,
|
||||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
" {" NL,
|
||||||
NL, " }"
|
" size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
|
||||||
NL, " }"
|
"n], glob_size_arr[(i + 2) % n] };" NL,
|
||||||
NL, "}"
|
" ndrange_t ndrange = ndrange_3D(glob_size);" NL,
|
||||||
NL
|
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||||
|
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||||
|
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
"}" NL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static const char* helper_ndrange_3d_loc[] =
|
static const char *helper_ndrange_3d_loc[] = {
|
||||||
{
|
NL,
|
||||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||||
NL, "{"
|
"{" NL,
|
||||||
NL, " atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
|
" atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
|
||||||
NL, "}"
|
"memory_order_relaxed, memory_scope_device);" NL,
|
||||||
NL, ""
|
"}" NL,
|
||||||
NL, "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
"" NL,
|
||||||
NL, "{"
|
"kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, "
|
||||||
NL, " size_t tid = get_global_id(0);"
|
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
|
||||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
"val, __global uint* ofs_arr)" NL,
|
||||||
NL, ""
|
"{" NL,
|
||||||
NL, " for(int k = 0; k < n; k++)"
|
" size_t tid = get_global_id(0);" NL,
|
||||||
NL, " {"
|
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||||
NL, " for(int i = 0; i < n; i++)"
|
"" NL,
|
||||||
NL, " {"
|
" for(int k = 0; k < n; k++)" NL,
|
||||||
NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
|
" {" NL,
|
||||||
NL, " if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
|
" for(int i = 0; i < n; i++)" NL,
|
||||||
NL, " {"
|
" {" NL,
|
||||||
NL, " size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
|
" uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % "
|
||||||
NL, " size_t loc_size[] = { 1, 1, loc_size_arr[k] };"
|
"n] * glob_size_arr[(i + 2) % n];" NL,
|
||||||
NL, " ndrange_t ndrange = ndrange_3D(glob_size,loc_size);"
|
" if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && "
|
||||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
"global_work_size <= (len * len))" NL,
|
||||||
NL, " "
|
" {" NL,
|
||||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
" size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
|
||||||
NL, " }"
|
"n], glob_size_arr[(i + 2) % n] };" NL,
|
||||||
NL, " }"
|
" size_t loc_size[] = { 1, 1, loc_size_arr[k] };" NL,
|
||||||
NL, " }"
|
" ndrange_t ndrange = ndrange_3D(glob_size,loc_size);" NL,
|
||||||
NL, "}"
|
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||||
NL
|
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||||
|
" " NL,
|
||||||
|
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
"}" NL
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char* helper_ndrange_3d_ofs[] =
|
static const char *helper_ndrange_3d_ofs[] = {
|
||||||
{
|
NL,
|
||||||
NL, "void block_fn(int len, __global atomic_uint* val)"
|
"void block_fn(int len, __global atomic_uint* val)" NL,
|
||||||
NL, "{"
|
"{" NL,
|
||||||
NL, " atomic_fetch_add_explicit(&val[(get_global_offset(2) * get_global_size(0) * get_global_size(1) + get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
|
" atomic_fetch_add_explicit(&val[(get_global_offset(2) * "
|
||||||
NL, "}"
|
"get_global_size(0) * get_global_size(1) + get_global_offset(1) * "
|
||||||
NL, ""
|
"get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % "
|
||||||
NL, "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val, __global uint* ofs_arr)"
|
"len], 1u, memory_order_relaxed, memory_scope_device);" NL,
|
||||||
NL, "{"
|
"}" NL,
|
||||||
NL, " size_t tid = get_global_id(0);"
|
"" NL,
|
||||||
NL, " void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
|
"kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, "
|
||||||
NL, ""
|
"__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
|
||||||
NL, " for(int l = 0; l < n; l++)"
|
"val, __global uint* ofs_arr)" NL,
|
||||||
NL, " {"
|
"{" NL,
|
||||||
NL, " for(int k = 0; k < n; k++)"
|
" size_t tid = get_global_id(0);" NL,
|
||||||
NL, " {"
|
" void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
|
||||||
NL, " for(int i = 0; i < n; i++)"
|
"" NL,
|
||||||
NL, " {"
|
" for(int l = 0; l < n; l++)" NL,
|
||||||
NL, " uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
|
" {" NL,
|
||||||
NL, " if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
|
" for(int k = 0; k < n; k++)" NL,
|
||||||
NL, " {"
|
" {" NL,
|
||||||
NL, " size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n]};"
|
" for(int i = 0; i < n; i++)" NL,
|
||||||
NL, " size_t loc_size[3] = { 1, 1, loc_size_arr[k] };"
|
" {" NL,
|
||||||
NL, " size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l + 2) % n] };"
|
" uint global_work_size = glob_size_arr[i] * glob_size_arr[(i + 1) "
|
||||||
NL, " ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);"
|
"% n] * glob_size_arr[(i + 2) % n];" NL,
|
||||||
NL, " int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
|
" if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && "
|
||||||
NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
|
"global_work_size <= (len * len))" NL,
|
||||||
NL, " }"
|
" {" NL,
|
||||||
NL, " }"
|
" size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) "
|
||||||
NL, " }"
|
"% n], glob_size_arr[(i + 2) % n]};" NL,
|
||||||
NL, " }"
|
" size_t loc_size[3] = { 1, 1, loc_size_arr[k] };" NL,
|
||||||
NL, "}"
|
" size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l "
|
||||||
NL
|
"+ 2) % n] };" NL,
|
||||||
|
" ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);" NL,
|
||||||
|
" int enq_res = enqueue_kernel(get_default_queue(), "
|
||||||
|
"CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
|
||||||
|
" if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
" }" NL,
|
||||||
|
"}" NL
|
||||||
};
|
};
|
||||||
|
|
||||||
static const kernel_src_dim_check sources_ndrange_Xd[] =
|
static const kernel_src_dim_check sources_ndrange_Xd[] =
|
||||||
|
|||||||
Reference in New Issue
Block a user