From 67ac6c8d2d1b2e8ee9d6b775be459759ec301bf9 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Tue, 14 Jun 2022 23:51:39 +0800 Subject: [PATCH] Fix test_half async_work_group_copy arguments (#1298) (#1299) Workitems in the last workgroup calls async_work_group_copy with different argument values depending on 'adjust'. According to spec, this results in undefined values. --- test_conformance/half/Test_vStoreHalf.cpp | 35 ++++++++++++++++++----- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp index 85824a9f..3ca5920b 100644 --- a/test_conformance/half/Test_vStoreHalf.cpp +++ b/test_conformance/half/Test_vStoreHalf.cpp @@ -422,7 +422,9 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR "__kernel void test( __global float *p, __global half *f,\n" " uint extra_last_thread )\n" "{\n" - " __local ushort data[3*(", local_buf_size, "+1)];\n" + " __local ushort data[3*(", + local_buf_size, + "+1)];\n" " size_t i = get_global_id(0);\n" " size_t lid = get_local_id(0);\n" " size_t last_i = get_global_size(0)-1;\n" @@ -432,9 +434,18 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR " if(last_i == i && extra_last_thread != 0) {\n" " adjust = 3-extra_last_thread;\n" " } " - " vstore_half3",roundName,"( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n" + " vstore_half3", + roundName, + "( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n" " barrier( CLK_LOCAL_MEM_FENCE ); \n" - " async_event = async_work_group_copy((__global ushort *)(f+3*(i-lid)), (__local ushort *)(&data[adjust]), lsize*3-adjust, 0);\n" // investigate later + " if (get_group_id(0) == (get_num_groups(0) - 1) &&\n" + " extra_last_thread != 0) {\n" + " adjust = 3-extra_last_thread;\n" + " }\n" + " async_event = async_work_group_copy(\n" + " (__global ushort*)(f+3*(i-lid)),\n" + " (__local ushort *)(&data[adjust]),\n" + " lsize*3-adjust, 0);\n" // investigate later " wait_group_events(1, &async_event);\n" "}\n" }; @@ -524,7 +535,9 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR "__kernel void test( __global double *p, __global half *f,\n" " uint extra_last_thread )\n" "{\n" - " __local ushort data[3*(", local_buf_size, "+1)];\n" + " __local ushort data[3*(", + local_buf_size, + "+1)];\n" " size_t i = get_global_id(0);\n" " size_t lid = get_local_id(0);\n" " size_t last_i = get_global_size(0)-1;\n" @@ -534,15 +547,23 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR " if(last_i == i && extra_last_thread != 0) {\n" " adjust = 3-extra_last_thread;\n" " }\n " - " vstore_half3",roundName,"( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n" + " vstore_half3", + roundName, + "( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n" " barrier( CLK_LOCAL_MEM_FENCE ); \n" - " async_event = async_work_group_copy((__global ushort *)(f+3*(i-lid)), (__local ushort *)(&data[adjust]), lsize*3-adjust, 0);\n" // investigate later + " if (get_group_id(0) == (get_num_groups(0) - 1) &&\n" + " extra_last_thread != 0) {\n" + " adjust = 3-extra_last_thread;\n" + " }\n" + " async_event = async_work_group_copy(\n" + " (__global ushort *)(f+3*(i-lid)),\n" + " (__local ushort *)(&data[adjust]),\n" + " lsize*3-adjust, 0);\n" // investigate later " wait_group_events(1, &async_event);\n" "}\n" }; - if(g_arrVecSizes[vectorSize] == 3) { programs[vectorSize][0] = MakeProgram( device, source_v3, sizeof(source_v3) / sizeof( source_v3[0]) ); } else {