mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Added cl_khr_fp16 extension support for test_decorate from spirv_new (#1770)
* Added cl_khr_fp16 extension support for test_decorate from spirv_new, work in progres * Complemented test_decorate saturation test to support cl_khr_fp16 extension (issue #142) * Fixed clang format * scope of modifications: -changed naming convention of saturation .spvasm files related to test_decorate of spirv_new -restored float to char/uchar saturation tests -few minor corrections * fix ranges for half testing * fix formating * one more formatting fix * remove unused function * use isnan instead of std::isnan isnan is currently implemented as a macro, not as a function, so we can't use std::isnan. * fix Clang warning about inexact conversion --------- Co-authored-by: Ben Ashbaugh <ben.ashbaugh@intel.com>
This commit is contained in:
@@ -0,0 +1,42 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos SPIR-V Tools Assembler; 0
|
||||
; Bound: 20
|
||||
; Schema: 0
|
||||
OpCapability Addresses
|
||||
OpCapability Linkage
|
||||
OpCapability Kernel
|
||||
OpCapability Float16
|
||||
OpCapability Int16
|
||||
OpMemoryModel Physical32 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_rounding_rte_half_short" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %in "in"
|
||||
OpName %entry "entry"
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_GlobalInvocationID Constant
|
||||
OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
|
||||
OpDecorate %6 FPRoundingMode RTE
|
||||
%uint = OpTypeInt 32 0
|
||||
%v3uint = OpTypeVector %uint 3
|
||||
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
|
||||
%void = OpTypeVoid
|
||||
%ushort = OpTypeInt 16 0
|
||||
%_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
|
||||
%half = OpTypeFloat 16
|
||||
%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
|
||||
%14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_half
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
|
||||
%1 = OpFunction %void None %14
|
||||
%res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
|
||||
%in = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%entry = OpLabel
|
||||
%15 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0
|
||||
%16 = OpCompositeExtract %uint %15 0
|
||||
%17 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %16
|
||||
%18 = OpLoad %half %17 Aligned 2
|
||||
%6 = OpConvertFToS %ushort %18
|
||||
%19 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_ushort %res %16
|
||||
OpStore %19 %6
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@@ -0,0 +1,46 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos SPIR-V Tools Assembler; 0
|
||||
; Bound: 23
|
||||
; Schema: 0
|
||||
OpCapability Addresses
|
||||
OpCapability Linkage
|
||||
OpCapability Kernel
|
||||
OpCapability Int64
|
||||
OpCapability Int16
|
||||
OpCapability Float16
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_rounding_rte_half_short" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %in "in"
|
||||
OpName %entry "entry"
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_GlobalInvocationID Constant
|
||||
OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
|
||||
OpDecorate %6 FPRoundingMode RTE
|
||||
%ulong = OpTypeInt 64 0
|
||||
%v3ulong = OpTypeVector %ulong 3
|
||||
%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
|
||||
%void = OpTypeVoid
|
||||
%ushort = OpTypeInt 16 0
|
||||
%_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
|
||||
%half = OpTypeFloat 16
|
||||
%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
|
||||
%14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_half
|
||||
%ulong_32 = OpConstant %ulong 32
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
|
||||
%1 = OpFunction %void None %14
|
||||
%res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
|
||||
%in = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%entry = OpLabel
|
||||
%16 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0
|
||||
%17 = OpCompositeExtract %ulong %16 0
|
||||
%18 = OpShiftLeftLogical %ulong %17 %ulong_32
|
||||
%19 = OpShiftRightArithmetic %ulong %18 %ulong_32
|
||||
%20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %19
|
||||
%21 = OpLoad %half %20 Aligned 2
|
||||
%6 = OpConvertFToS %ushort %21
|
||||
%22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_ushort %res %19
|
||||
OpStore %22 %6
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@@ -0,0 +1,42 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos SPIR-V Tools Assembler; 0
|
||||
; Bound: 21
|
||||
; Schema: 0
|
||||
OpCapability Addresses
|
||||
OpCapability Linkage
|
||||
OpCapability Kernel
|
||||
OpCapability Float16
|
||||
OpCapability Int16
|
||||
OpMemoryModel Physical32 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_rounding_rtn_half_short" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %in "in"
|
||||
OpName %entry "entry"
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_GlobalInvocationID Constant
|
||||
OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
|
||||
OpDecorate %6 FPRoundingMode RTN
|
||||
%uint = OpTypeInt 32 0
|
||||
%v3uint = OpTypeVector %uint 3
|
||||
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
|
||||
%void = OpTypeVoid
|
||||
%ushort = OpTypeInt 16 0
|
||||
%_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
|
||||
%half = OpTypeFloat 16
|
||||
%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
|
||||
%15 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_half
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
|
||||
%1 = OpFunction %void None %15
|
||||
%res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
|
||||
%in = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%entry = OpLabel
|
||||
%16 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0
|
||||
%17 = OpCompositeExtract %uint %16 0
|
||||
%18 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %17
|
||||
%19 = OpLoad %half %18 Aligned 2
|
||||
%6 = OpConvertFToS %ushort %19
|
||||
%20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_ushort %res %17
|
||||
OpStore %20 %6
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@@ -0,0 +1,46 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos SPIR-V Tools Assembler; 0
|
||||
; Bound: 23
|
||||
; Schema: 0
|
||||
OpCapability Addresses
|
||||
OpCapability Linkage
|
||||
OpCapability Kernel
|
||||
OpCapability Int64
|
||||
OpCapability Float16
|
||||
OpCapability Int16
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_rounding_rtn_half_short" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %in "in"
|
||||
OpName %entry "entry"
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_GlobalInvocationID Constant
|
||||
OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
|
||||
OpDecorate %6 FPRoundingMode RTN
|
||||
%ulong = OpTypeInt 64 0
|
||||
%v3ulong = OpTypeVector %ulong 3
|
||||
%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
|
||||
%void = OpTypeVoid
|
||||
%ushort = OpTypeInt 16 0
|
||||
%_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
|
||||
%half = OpTypeFloat 16
|
||||
%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
|
||||
%14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_half
|
||||
%ulong_32 = OpConstant %ulong 32
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
|
||||
%1 = OpFunction %void None %14
|
||||
%res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
|
||||
%in = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%entry = OpLabel
|
||||
%16 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0
|
||||
%17 = OpCompositeExtract %ulong %16 0
|
||||
%18 = OpShiftLeftLogical %ulong %17 %ulong_32
|
||||
%19 = OpShiftRightArithmetic %ulong %18 %ulong_32
|
||||
%20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %19
|
||||
%21 = OpLoad %half %20 Aligned 2
|
||||
%6 = OpConvertFToS %ushort %21
|
||||
%22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_ushort %res %19
|
||||
OpStore %22 %6
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@@ -0,0 +1,42 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos SPIR-V Tools Assembler; 0
|
||||
; Bound: 21
|
||||
; Schema: 0
|
||||
OpCapability Addresses
|
||||
OpCapability Linkage
|
||||
OpCapability Kernel
|
||||
OpCapability Float16
|
||||
OpCapability Int16
|
||||
OpMemoryModel Physical32 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_rounding_rtp_half_short" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %in "in"
|
||||
OpName %entry "entry"
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_GlobalInvocationID Constant
|
||||
OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
|
||||
OpDecorate %6 FPRoundingMode RTP
|
||||
%uint = OpTypeInt 32 0
|
||||
%v3uint = OpTypeVector %uint 3
|
||||
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
|
||||
%void = OpTypeVoid
|
||||
%ushort = OpTypeInt 16 0
|
||||
%_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
|
||||
%half = OpTypeFloat 16
|
||||
%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
|
||||
%15 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_half
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
|
||||
%1 = OpFunction %void None %15
|
||||
%res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
|
||||
%in = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%entry = OpLabel
|
||||
%16 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0
|
||||
%17 = OpCompositeExtract %uint %16 0
|
||||
%18 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %17
|
||||
%19 = OpLoad %half %18 Aligned 2
|
||||
%6 = OpConvertFToS %ushort %19
|
||||
%20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_ushort %res %17
|
||||
OpStore %20 %6
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@@ -0,0 +1,46 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos SPIR-V Tools Assembler; 0
|
||||
; Bound: 23
|
||||
; Schema: 0
|
||||
OpCapability Addresses
|
||||
OpCapability Linkage
|
||||
OpCapability Kernel
|
||||
OpCapability Int64
|
||||
OpCapability Float16
|
||||
OpCapability Int16
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_rounding_rtp_half_short" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %in "in"
|
||||
OpName %entry "entry"
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_GlobalInvocationID Constant
|
||||
OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
|
||||
OpDecorate %6 FPRoundingMode RTP
|
||||
%ulong = OpTypeInt 64 0
|
||||
%v3ulong = OpTypeVector %ulong 3
|
||||
%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
|
||||
%void = OpTypeVoid
|
||||
%ushort = OpTypeInt 16 0
|
||||
%_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
|
||||
%half = OpTypeFloat 16
|
||||
%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
|
||||
%14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_half
|
||||
%ulong_32 = OpConstant %ulong 32
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
|
||||
%1 = OpFunction %void None %14
|
||||
%res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
|
||||
%in = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%entry = OpLabel
|
||||
%16 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0
|
||||
%17 = OpCompositeExtract %ulong %16 0
|
||||
%18 = OpShiftLeftLogical %ulong %17 %ulong_32
|
||||
%19 = OpShiftRightArithmetic %ulong %18 %ulong_32
|
||||
%20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %19
|
||||
%21 = OpLoad %half %20 Aligned 2
|
||||
%6 = OpConvertFToS %ushort %21
|
||||
%22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_ushort %res %19
|
||||
OpStore %22 %6
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@@ -0,0 +1,42 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos SPIR-V Tools Assembler; 0
|
||||
; Bound: 21
|
||||
; Schema: 0
|
||||
OpCapability Addresses
|
||||
OpCapability Linkage
|
||||
OpCapability Kernel
|
||||
OpCapability Float16
|
||||
OpCapability Int16
|
||||
OpMemoryModel Physical32 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_rounding_rtz_half_short" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %in "in"
|
||||
OpName %entry "entry"
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_GlobalInvocationID Constant
|
||||
OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
|
||||
OpDecorate %6 FPRoundingMode RTZ
|
||||
%uint = OpTypeInt 32 0
|
||||
%v3uint = OpTypeVector %uint 3
|
||||
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
|
||||
%void = OpTypeVoid
|
||||
%ushort = OpTypeInt 16 0
|
||||
%_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
|
||||
%half = OpTypeFloat 16
|
||||
%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
|
||||
%15 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_half
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
|
||||
%1 = OpFunction %void None %15
|
||||
%res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
|
||||
%in = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%entry = OpLabel
|
||||
%16 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0
|
||||
%17 = OpCompositeExtract %uint %16 0
|
||||
%18 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %17
|
||||
%19 = OpLoad %half %18 Aligned 2
|
||||
%6 = OpConvertFToS %ushort %19
|
||||
%20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_ushort %res %17
|
||||
OpStore %20 %6
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@@ -0,0 +1,46 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos SPIR-V Tools Assembler; 0
|
||||
; Bound: 23
|
||||
; Schema: 0
|
||||
OpCapability Addresses
|
||||
OpCapability Linkage
|
||||
OpCapability Kernel
|
||||
OpCapability Int64
|
||||
OpCapability Float16
|
||||
OpCapability Int16
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_rounding_rtz_half_short" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %in "in"
|
||||
OpName %entry "entry"
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_GlobalInvocationID Constant
|
||||
OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
|
||||
OpDecorate %6 FPRoundingMode RTZ
|
||||
%ulong = OpTypeInt 64 0
|
||||
%v3ulong = OpTypeVector %ulong 3
|
||||
%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
|
||||
%void = OpTypeVoid
|
||||
%ushort = OpTypeInt 16 0
|
||||
%_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
|
||||
%half = OpTypeFloat 16
|
||||
%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
|
||||
%14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_half
|
||||
%ulong_32 = OpConstant %ulong 32
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
|
||||
%1 = OpFunction %void None %14
|
||||
%res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
|
||||
%in = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%entry = OpLabel
|
||||
%16 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0
|
||||
%17 = OpCompositeExtract %ulong %16 0
|
||||
%18 = OpShiftLeftLogical %ulong %17 %ulong_32
|
||||
%19 = OpShiftRightArithmetic %ulong %18 %ulong_32
|
||||
%20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %19
|
||||
%21 = OpLoad %half %20 Aligned 2
|
||||
%6 = OpConvertFToS %ushort %21
|
||||
%22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_ushort %res %19
|
||||
OpStore %22 %6
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@@ -8,7 +8,7 @@
|
||||
OpCapability Kernel
|
||||
OpCapability Float64
|
||||
OpMemoryModel Physical32 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_int" %gl_GlobalInvocationID
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_double_to_int" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
@@ -9,7 +9,7 @@
|
||||
OpCapability Int64
|
||||
OpCapability Float64
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_uint" %gl_GlobalInvocationID
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_double_to_int" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
@@ -8,7 +8,7 @@
|
||||
OpCapability Kernel
|
||||
OpCapability Float64
|
||||
OpMemoryModel Physical32 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_uint" %gl_GlobalInvocationID
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_double_to_uint" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
@@ -9,7 +9,7 @@
|
||||
OpCapability Int64
|
||||
OpCapability Float64
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_int" %gl_GlobalInvocationID
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_double_to_uint" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
@@ -8,7 +8,7 @@
|
||||
OpCapability Kernel
|
||||
OpCapability Int8
|
||||
OpMemoryModel Physical32 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_char" %gl_GlobalInvocationID
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_float_to_char" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
@@ -9,7 +9,7 @@
|
||||
OpCapability Int64
|
||||
OpCapability Int8
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_char" %gl_GlobalInvocationID
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_float_to_char" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
@@ -8,7 +8,7 @@
|
||||
OpCapability Kernel
|
||||
OpCapability Int16
|
||||
OpMemoryModel Physical32 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_short" %gl_GlobalInvocationID
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_float_to_short" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
@@ -9,7 +9,7 @@
|
||||
OpCapability Int64
|
||||
OpCapability Int16
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_short" %gl_GlobalInvocationID
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_float_to_short" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
@@ -8,7 +8,7 @@
|
||||
OpCapability Kernel
|
||||
OpCapability Int8
|
||||
OpMemoryModel Physical32 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_uchar" %gl_GlobalInvocationID
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_float_to_uchar" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
@@ -9,7 +9,7 @@
|
||||
OpCapability Int64
|
||||
OpCapability Int8
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_uchar" %gl_GlobalInvocationID
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_float_to_uchar" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
@@ -8,7 +8,7 @@
|
||||
OpCapability Kernel
|
||||
OpCapability Int16
|
||||
OpMemoryModel Physical32 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_ushort" %gl_GlobalInvocationID
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_float_to_ushort" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
@@ -9,7 +9,7 @@
|
||||
OpCapability Int64
|
||||
OpCapability Int16
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_ushort" %gl_GlobalInvocationID
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_float_to_ushort" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
@@ -0,0 +1,47 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos SPIR-V Tools Assembler; 0
|
||||
; Bound: 25
|
||||
; Schema: 0
|
||||
OpCapability Addresses
|
||||
OpCapability Linkage
|
||||
OpCapability Kernel
|
||||
OpCapability Int8
|
||||
OpCapability Float16
|
||||
OpMemoryModel Physical32 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_half_to_char" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
OpName %entry "entry"
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_GlobalInvocationID Constant
|
||||
OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
|
||||
OpDecorate %7 SaturatedConversion
|
||||
%uint = OpTypeInt 32 0
|
||||
%v3uint = OpTypeVector %uint 3
|
||||
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
|
||||
%void = OpTypeVoid
|
||||
%uchar = OpTypeInt 8 0
|
||||
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
|
||||
%half = OpTypeFloat 16
|
||||
%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
|
||||
%16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
|
||||
%1 = OpFunction %void None %16
|
||||
%res = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
|
||||
%lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%entry = OpLabel
|
||||
%17 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0
|
||||
%18 = OpCompositeExtract %uint %17 0
|
||||
%19 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %lhs %18
|
||||
%20 = OpLoad %half %19 Aligned 2
|
||||
%21 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %18
|
||||
%22 = OpLoad %half %21 Aligned 2
|
||||
%23 = OpFMul %half %20 %22
|
||||
%7 = OpConvertFToS %uchar %23
|
||||
%24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %res %18
|
||||
OpStore %24 %7
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@@ -0,0 +1,51 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos SPIR-V Tools Assembler; 0
|
||||
; Bound: 28
|
||||
; Schema: 0
|
||||
OpCapability Addresses
|
||||
OpCapability Linkage
|
||||
OpCapability Kernel
|
||||
OpCapability Int64
|
||||
OpCapability Int8
|
||||
OpCapability Float16
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_half_to_char" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
OpName %entry "entry"
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_GlobalInvocationID Constant
|
||||
OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
|
||||
OpDecorate %7 SaturatedConversion
|
||||
%ulong = OpTypeInt 64 0
|
||||
%v3ulong = OpTypeVector %ulong 3
|
||||
%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
|
||||
%void = OpTypeVoid
|
||||
%uchar = OpTypeInt 8 0
|
||||
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
|
||||
%half = OpTypeFloat 16
|
||||
%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
|
||||
%16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
|
||||
%ulong_32 = OpConstant %ulong 32
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
|
||||
%1 = OpFunction %void None %16
|
||||
%res = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
|
||||
%lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%entry = OpLabel
|
||||
%18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0
|
||||
%19 = OpCompositeExtract %ulong %18 0
|
||||
%20 = OpShiftLeftLogical %ulong %19 %ulong_32
|
||||
%21 = OpShiftRightArithmetic %ulong %20 %ulong_32
|
||||
%22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %lhs %21
|
||||
%23 = OpLoad %half %22 Aligned 2
|
||||
%24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %21
|
||||
%25 = OpLoad %half %24 Aligned 2
|
||||
%26 = OpFMul %half %23 %25
|
||||
%7 = OpConvertFToS %uchar %26
|
||||
%27 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %res %21
|
||||
OpStore %27 %7
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@@ -0,0 +1,47 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos SPIR-V Tools Assembler; 0
|
||||
; Bound: 25
|
||||
; Schema: 0
|
||||
OpCapability Addresses
|
||||
OpCapability Linkage
|
||||
OpCapability Kernel
|
||||
OpCapability Int8
|
||||
OpCapability Float16
|
||||
OpMemoryModel Physical32 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_half_to_uchar" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
OpName %entry "entry"
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_GlobalInvocationID Constant
|
||||
OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
|
||||
OpDecorate %7 SaturatedConversion
|
||||
%uint = OpTypeInt 32 0
|
||||
%v3uint = OpTypeVector %uint 3
|
||||
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
|
||||
%void = OpTypeVoid
|
||||
%uchar = OpTypeInt 8 0
|
||||
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
|
||||
%half = OpTypeFloat 16
|
||||
%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
|
||||
%16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
|
||||
%1 = OpFunction %void None %16
|
||||
%res = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
|
||||
%lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%entry = OpLabel
|
||||
%17 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0
|
||||
%18 = OpCompositeExtract %uint %17 0
|
||||
%19 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %lhs %18
|
||||
%20 = OpLoad %half %19 Aligned 2
|
||||
%21 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %18
|
||||
%22 = OpLoad %half %21 Aligned 2
|
||||
%23 = OpFMul %half %20 %22
|
||||
%7 = OpConvertFToU %uchar %23
|
||||
%24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %res %18
|
||||
OpStore %24 %7
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@@ -0,0 +1,51 @@
|
||||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Khronos SPIR-V Tools Assembler; 0
|
||||
; Bound: 28
|
||||
; Schema: 0
|
||||
OpCapability Addresses
|
||||
OpCapability Linkage
|
||||
OpCapability Kernel
|
||||
OpCapability Int64
|
||||
OpCapability Int8
|
||||
OpCapability Float16
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "decorate_saturated_conversion_half_to_uchar" %gl_GlobalInvocationID
|
||||
OpName %res "res"
|
||||
OpName %lhs "lhs"
|
||||
OpName %rhs "rhs"
|
||||
OpName %entry "entry"
|
||||
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
|
||||
OpDecorate %gl_GlobalInvocationID Constant
|
||||
OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
|
||||
OpDecorate %7 SaturatedConversion
|
||||
%ulong = OpTypeInt 64 0
|
||||
%v3ulong = OpTypeVector %ulong 3
|
||||
%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
|
||||
%void = OpTypeVoid
|
||||
%uchar = OpTypeInt 8 0
|
||||
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
|
||||
%half = OpTypeFloat 16
|
||||
%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
|
||||
%16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
|
||||
%ulong_32 = OpConstant %ulong 32
|
||||
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
|
||||
%1 = OpFunction %void None %16
|
||||
%res = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
|
||||
%lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
|
||||
%entry = OpLabel
|
||||
%18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0
|
||||
%19 = OpCompositeExtract %ulong %18 0
|
||||
%20 = OpShiftLeftLogical %ulong %19 %ulong_32
|
||||
%21 = OpShiftRightArithmetic %ulong %20 %ulong_32
|
||||
%22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %lhs %21
|
||||
%23 = OpLoad %half %22 Aligned 2
|
||||
%24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %21
|
||||
%25 = OpLoad %half %24 Aligned 2
|
||||
%26 = OpFMul %half %23 %25
|
||||
%7 = OpConvertFToU %uchar %26
|
||||
%27 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %res %21
|
||||
OpStore %27 %7
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
@@ -22,10 +22,7 @@
|
||||
#include <limits>
|
||||
#include <cmath>
|
||||
|
||||
#ifndef isnan
|
||||
// Ensure isnan is always present as a macro
|
||||
#define isnan std::isnan
|
||||
#endif
|
||||
#include <CL/cl_half.h>
|
||||
|
||||
long double reference_remainderl(long double x, long double y);
|
||||
int gIsInRTZMode = 0;
|
||||
@@ -33,7 +30,6 @@ int gDeviceILogb0 = 1;
|
||||
int gDeviceILogbNaN = 1;
|
||||
int gCheckTininessBeforeRounding = 1;
|
||||
|
||||
|
||||
static int verify_results(cl_device_id deviceID,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
@@ -47,7 +43,8 @@ static int verify_results(cl_device_id deviceID,
|
||||
cl_int err = 0;
|
||||
|
||||
RandomSeed seed(gRandomSeed);
|
||||
for (int i = 0; i < num; i++) {
|
||||
for (int i = 0; i < num; i++)
|
||||
{
|
||||
h_lhs[i] = genrand<cl_int>(seed);
|
||||
h_rhs[i] = genrand<cl_int>(seed);
|
||||
}
|
||||
@@ -89,8 +86,10 @@ static int verify_results(cl_device_id deviceID,
|
||||
err = clEnqueueReadBuffer(queue, res, CL_TRUE, 0, bytes, &h_res[0], 0, NULL, NULL);
|
||||
SPIRV_CHECK_ERROR(err, "Failed to read to output");
|
||||
|
||||
for (int i = 0; i < num; i++) {
|
||||
if (h_res[i] != (h_lhs[i] + h_rhs[i])) {
|
||||
for (int i = 0; i < num; i++)
|
||||
{
|
||||
if (h_res[i] != (h_lhs[i] + h_rhs[i]))
|
||||
{
|
||||
log_error("Values do not match at location %d\n", i);
|
||||
return -1;
|
||||
}
|
||||
@@ -135,12 +134,10 @@ TEST_SPIRV_FUNC(decorate_constant)
|
||||
|
||||
TEST_SPIRV_FUNC(decorate_cpacked)
|
||||
{
|
||||
PACKED(
|
||||
struct packed_struct_t {
|
||||
cl_int ival;
|
||||
cl_char cval;
|
||||
}
|
||||
);
|
||||
PACKED(struct packed_struct_t {
|
||||
cl_int ival;
|
||||
cl_char cval;
|
||||
});
|
||||
|
||||
typedef struct packed_struct_t packed_t;
|
||||
|
||||
@@ -169,9 +166,10 @@ TEST_SPIRV_FUNC(decorate_cpacked)
|
||||
err = clEnqueueReadBuffer(queue, res, CL_TRUE, 0, bytes, &h_res[0], 0, NULL, NULL);
|
||||
SPIRV_CHECK_ERROR(err, "Failed to read to output");
|
||||
|
||||
for (int i = 0; i < num; i++) {
|
||||
if (h_res[i].ival != 2100483600 ||
|
||||
h_res[i].cval != 127) {
|
||||
for (int i = 0; i < num; i++)
|
||||
{
|
||||
if (h_res[i].ival != 2100483600 || h_res[i].cval != 127)
|
||||
{
|
||||
log_error("Values do not match at location %d\n", i);
|
||||
return -1;
|
||||
}
|
||||
@@ -180,20 +178,79 @@ TEST_SPIRV_FUNC(decorate_cpacked)
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename Ti, typename Tl, typename To>
|
||||
int verify_saturated_results(cl_device_id deviceID,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
const char *kname,
|
||||
const clProgramWrapper &prog)
|
||||
template <typename Ti, typename Tl, typename To>
|
||||
static inline Ti generate_saturated_lhs_input(RandomSeed &seed)
|
||||
{
|
||||
if(std::string(kname).find("double") != std::string::npos) {
|
||||
if(!is_extension_available(deviceID, "cl_khr_fp64")) {
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
|
||||
return 0;
|
||||
}
|
||||
constexpr auto loVal = std::numeric_limits<To>::min();
|
||||
constexpr auto hiVal = std::numeric_limits<To>::max();
|
||||
constexpr Tl range = (Tl)(hiVal) - (Tl)(loVal);
|
||||
|
||||
if (std::is_same<cl_half, Ti>::value)
|
||||
{
|
||||
return cl_half_from_float(genrand<float>(seed) * range, CL_HALF_RTE);
|
||||
}
|
||||
|
||||
return genrand<Ti>(seed) * range;
|
||||
}
|
||||
|
||||
template <typename Ti, typename Tl, typename To>
|
||||
static inline Ti generate_saturated_rhs_input(RandomSeed &seed)
|
||||
{
|
||||
constexpr auto hiVal = std::numeric_limits<To>::max();
|
||||
|
||||
Tl val = genrand<Tl>(seed) % hiVal;
|
||||
if (std::is_same<cl_half, Ti>::value)
|
||||
{
|
||||
if (val > 0 && val * 20 < hiVal)
|
||||
{
|
||||
return cl_half_from_float(NAN, CL_HALF_RTE);
|
||||
}
|
||||
return cl_half_from_float(val, CL_HALF_RTE);
|
||||
}
|
||||
|
||||
if (val > 0 && val * 20 < hiVal)
|
||||
{
|
||||
return (Ti)NAN;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
template <typename Ti, typename Tl, typename To>
|
||||
static inline To compute_saturated_output(Ti lhs, Ti rhs)
|
||||
{
|
||||
constexpr auto loVal = std::numeric_limits<To>::min();
|
||||
constexpr auto hiVal = std::numeric_limits<To>::max();
|
||||
|
||||
if (std::is_same<Ti, cl_half>::value)
|
||||
{
|
||||
cl_float f = cl_half_to_float(lhs) * cl_half_to_float(rhs);
|
||||
|
||||
// Quantize to fp16:
|
||||
f = cl_half_to_float(cl_half_from_float(f, CL_HALF_RTE));
|
||||
|
||||
To val = (To)std::min<float>(std::max<float>(f, loVal), hiVal);
|
||||
if (isnan(cl_half_from_float(rhs, CL_HALF_RTE)))
|
||||
{
|
||||
val = 0;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
Tl ival = (Tl)(lhs * rhs);
|
||||
To val = (To)std::min<Ti>(std::max<Ti>(ival, loVal), hiVal);
|
||||
|
||||
if (isnan(rhs))
|
||||
{
|
||||
val = 0;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
template <typename Ti, typename Tl, typename To>
|
||||
int verify_saturated_results(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue, const char *kname,
|
||||
const clProgramWrapper &prog)
|
||||
{
|
||||
cl_int err = 0;
|
||||
|
||||
const int num = 1 << 20;
|
||||
@@ -207,21 +264,11 @@ int verify_saturated_results(cl_device_id deviceID,
|
||||
std::vector<Ti> h_lhs(num);
|
||||
std::vector<Ti> h_rhs(num);
|
||||
|
||||
To loVal = std::numeric_limits<To>::min();
|
||||
To hiVal = std::numeric_limits<To>::max();
|
||||
|
||||
Tl range = (Tl)(hiVal) - (Tl)(loVal);
|
||||
|
||||
RandomSeed seed(gRandomSeed);
|
||||
for (int i = 0; i < num; i++) {
|
||||
h_lhs[i] = genrand<Ti>(seed) * range;
|
||||
Tl val = (genrand<Tl>(seed) % hiVal);
|
||||
// randomly set some values on rhs to NaN
|
||||
if (val * 20 < hiVal) {
|
||||
h_rhs[i] = NAN;
|
||||
} else {
|
||||
h_rhs[i] = (Ti)(val);
|
||||
}
|
||||
for (int i = 0; i < num; i++)
|
||||
{
|
||||
h_lhs[i] = generate_saturated_lhs_input<Ti, Tl, To>(seed);
|
||||
h_rhs[i] = generate_saturated_rhs_input<Ti, Tl, To>(seed);
|
||||
}
|
||||
|
||||
clMemWrapper lhs = clCreateBuffer(context, CL_MEM_READ_ONLY, in_bytes, NULL, &err);
|
||||
@@ -256,16 +303,13 @@ int verify_saturated_results(cl_device_id deviceID,
|
||||
err = clEnqueueReadBuffer(queue, res, CL_TRUE, 0, out_bytes, &h_res[0], 0, NULL, NULL);
|
||||
SPIRV_CHECK_ERROR(err, "Failed to read to output");
|
||||
|
||||
for (int i = 0; i < num; i++) {
|
||||
Tl ival = (Tl)(h_lhs[i] * h_rhs[i]);
|
||||
To val = (To)std::min<Ti>(std::max<Ti>(ival, loVal), hiVal);
|
||||
for (int i = 0; i < num; i++)
|
||||
{
|
||||
To val = compute_saturated_output<Ti, Tl, To>(h_lhs[i], h_rhs[i]);
|
||||
|
||||
if (isnan(h_rhs[i])) {
|
||||
val = 0;
|
||||
}
|
||||
|
||||
if (val != h_res[i]) {
|
||||
log_error("Value error at %d\n", i);
|
||||
if (val != h_res[i])
|
||||
{
|
||||
log_error("Value error at %d: got %d, want %d\n", i, val, h_res[i]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -281,31 +325,47 @@ int test_saturate_full(cl_device_id deviceID,
|
||||
const char *name,
|
||||
const char *types)
|
||||
{
|
||||
if(std::string(types).find("double") != std::string::npos) {
|
||||
if(!is_extension_available(deviceID, "cl_khr_fp64")) {
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
|
||||
if (std::string(types).find("double") != std::string::npos)
|
||||
{
|
||||
if (!is_extension_available(deviceID, "cl_khr_fp64"))
|
||||
{
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double "
|
||||
"tests.\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (std::string(types).find("half") != std::string::npos)
|
||||
{
|
||||
if (!is_extension_available(deviceID, "cl_khr_fp16"))
|
||||
{
|
||||
log_info(
|
||||
"Extension cl_khr_fp16 not supported; skipping half tests.\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
clProgramWrapper prog;
|
||||
cl_int err = 0;
|
||||
err = get_program_with_il(prog, deviceID, context, name);
|
||||
SPIRV_CHECK_ERROR(err, "Failed to build program");
|
||||
return verify_saturated_results<Ti, Tl, To>(deviceID, context, queue, name, prog);
|
||||
return verify_saturated_results<Ti, Tl, To>(deviceID, context, queue, name,
|
||||
prog);
|
||||
}
|
||||
|
||||
#define TEST_SATURATED_CONVERSION(Ti, Tl, To) \
|
||||
TEST_SPIRV_FUNC(decorate_saturated_conversion_##To) \
|
||||
{ \
|
||||
typedef cl_##Ti cl_Ti; \
|
||||
typedef cl_##Tl cl_Tl; \
|
||||
typedef cl_##To cl_To; \
|
||||
return test_saturate_full<cl_Ti, cl_Tl, cl_To> \
|
||||
(deviceID, context, queue, \
|
||||
"decorate_saturated_conversion_" #To, \
|
||||
#Ti #Tl #To); \
|
||||
} \
|
||||
#define TEST_SATURATED_CONVERSION(Ti, Tl, To) \
|
||||
TEST_SPIRV_FUNC(decorate_saturated_conversion_##Ti##_to_##To) \
|
||||
{ \
|
||||
typedef cl_##Ti cl_Ti; \
|
||||
typedef cl_##Tl cl_Tl; \
|
||||
typedef cl_##To cl_To; \
|
||||
const char *name = "decorate_saturated_conversion_" #Ti "_to_" #To; \
|
||||
return test_saturate_full<cl_Ti, cl_Tl, cl_To>( \
|
||||
deviceID, context, queue, name, #Ti #Tl #To); \
|
||||
}
|
||||
|
||||
TEST_SATURATED_CONVERSION(half, short, char)
|
||||
TEST_SATURATED_CONVERSION(half, ushort, uchar)
|
||||
TEST_SATURATED_CONVERSION(float, int, char)
|
||||
TEST_SATURATED_CONVERSION(float, uint, uchar)
|
||||
TEST_SATURATED_CONVERSION(float, int, short)
|
||||
@@ -321,13 +381,26 @@ int test_fp_rounding(cl_device_id deviceID,
|
||||
std::vector<Ti> &h_in,
|
||||
std::vector<To> &h_out)
|
||||
{
|
||||
if(std::string(name).find("double") != std::string::npos) {
|
||||
if(!is_extension_available(deviceID, "cl_khr_fp64")) {
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
|
||||
if (std::string(name).find("double") != std::string::npos)
|
||||
{
|
||||
if (!is_extension_available(deviceID, "cl_khr_fp64"))
|
||||
{
|
||||
log_info("Extension cl_khr_fp64 not supported; skipping double "
|
||||
"tests.\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (std::string(name).find("half") != std::string::npos)
|
||||
{
|
||||
if (!is_extension_available(deviceID, "cl_khr_fp16"))
|
||||
{
|
||||
log_info(
|
||||
"Extension cl_khr_fp16 not supported; skipping half tests.\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const int num = h_in.size();
|
||||
const size_t in_bytes = num * sizeof(Ti);
|
||||
const size_t out_bytes = num * sizeof(To);
|
||||
@@ -362,9 +435,12 @@ int test_fp_rounding(cl_device_id deviceID,
|
||||
err = clEnqueueReadBuffer(queue, out, CL_TRUE, 0, out_bytes, &h_res[0], 0, NULL, NULL);
|
||||
SPIRV_CHECK_ERROR(err, "Failed to read from output");
|
||||
|
||||
for (int i = 0; i < num; i++) {
|
||||
if (h_res[i] != h_out[i]) {
|
||||
log_error("Values do not match at location %d. Original :%lf, Expected: %ld, Found %ld\n",
|
||||
for (int i = 0; i < num; i++)
|
||||
{
|
||||
if (h_res[i] != h_out[i])
|
||||
{
|
||||
log_error("Values do not match at location %d. Original :%lf, "
|
||||
"Expected: %ld, Found %ld\n",
|
||||
i, h_in[i], h_out[i], h_res[i]);
|
||||
return -1;
|
||||
}
|
||||
@@ -373,60 +449,80 @@ int test_fp_rounding(cl_device_id deviceID,
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename Ti, typename To>
|
||||
inline To round_to_zero(Ti in)
|
||||
template <typename T> static inline double to_double(T in) { return in; }
|
||||
|
||||
template <> inline double to_double(cl_half in) { return cl_half_to_float(in); }
|
||||
|
||||
template <typename Ti, typename To> static inline To round_to_zero(Ti in)
|
||||
{
|
||||
To out = (To)(in);
|
||||
return out;
|
||||
return (To)to_double(in);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
int sign(T val)
|
||||
template <typename T> static inline int sign(T val)
|
||||
{
|
||||
if (val < 0) return -1;
|
||||
if (val > 0) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename Ti, typename To>
|
||||
inline To round_to_even(Ti in)
|
||||
template <typename Ti, typename To> static inline To round_to_even(Ti in)
|
||||
{
|
||||
// https://en.wikipedia.org/wiki/Rounding#Round_half_to_even
|
||||
return std::floor(in + 0.5) - 1 + std::abs(sign(reference_remainderl((long double)in, 2) - 0.5));
|
||||
double din = to_double(in);
|
||||
return std::floor(din + 0.5) - 1
|
||||
+ std::abs(sign(reference_remainderl((long double)din, 2) - 0.5));
|
||||
}
|
||||
|
||||
template<typename Ti, typename To>
|
||||
inline To round_to_posinf(Ti in)
|
||||
template <typename Ti, typename To> static inline To round_to_posinf(Ti in)
|
||||
{
|
||||
To out = std::ceil(in);
|
||||
return out;
|
||||
return std::ceil(to_double(in));
|
||||
}
|
||||
|
||||
template<typename Ti, typename To>
|
||||
inline To round_to_neginf(Ti in)
|
||||
template <typename Ti, typename To> static inline To round_to_neginf(Ti in)
|
||||
{
|
||||
To out = std::floor(in);
|
||||
return out;
|
||||
return std::floor(to_double(in));
|
||||
}
|
||||
|
||||
#define TEST_SPIRV_FP_ROUNDING_DECORATE(name, func, Ti, To) \
|
||||
TEST_SPIRV_FUNC(decorate_fp_rounding_mode_##name##_##Ti##_##To) \
|
||||
{ \
|
||||
typedef cl_##Ti clTi; \
|
||||
typedef cl_##To clTo; \
|
||||
const int num = 1 << 16; \
|
||||
std::vector<clTi> in(num); \
|
||||
std::vector<clTo> out(num); \
|
||||
RandomSeed seed(gRandomSeed); \
|
||||
\
|
||||
for (int i = 0; i < num; i++) { \
|
||||
in[i] = num * genrand<clTi>(seed) - num/2; \
|
||||
out[i] = func<clTi, clTo>(in[i]); \
|
||||
} \
|
||||
const char *name = "decorate_rounding_" #name "_" #Ti "_" #To; \
|
||||
return test_fp_rounding(deviceID, context, queue, \
|
||||
name, in, out); \
|
||||
} \
|
||||
template <typename Ti, typename To>
|
||||
static inline Ti generate_fprounding_input(RandomSeed &seed)
|
||||
{
|
||||
if (std::is_same<cl_half, Ti>::value)
|
||||
{
|
||||
constexpr auto minVal =
|
||||
static_cast<cl_float>(std::numeric_limits<To>::min() / 2);
|
||||
constexpr auto maxVal =
|
||||
static_cast<cl_float>(std::numeric_limits<To>::max() / 2);
|
||||
cl_float f = genrandReal_range<cl_float>(minVal, maxVal, seed);
|
||||
return cl_half_from_float(f, CL_HALF_RTE);
|
||||
}
|
||||
|
||||
constexpr auto minVal = static_cast<Ti>(std::numeric_limits<To>::min() / 2);
|
||||
constexpr auto maxVal = static_cast<Ti>(std::numeric_limits<To>::max() / 2);
|
||||
return genrandReal_range<Ti>(minVal, maxVal, seed);
|
||||
}
|
||||
|
||||
#define TEST_SPIRV_FP_ROUNDING_DECORATE(name, func, Ti, To) \
|
||||
TEST_SPIRV_FUNC(decorate_fp_rounding_mode_##name##_##Ti##_##To) \
|
||||
{ \
|
||||
typedef cl_##Ti clTi; \
|
||||
typedef cl_##To clTo; \
|
||||
const int num = 1 << 16; \
|
||||
std::vector<clTi> in(num); \
|
||||
std::vector<clTo> out(num); \
|
||||
RandomSeed seed(gRandomSeed); \
|
||||
\
|
||||
for (int i = 0; i < num; i++) \
|
||||
{ \
|
||||
in[i] = generate_fprounding_input<clTi, clTo>(seed); \
|
||||
out[i] = func<clTi, clTo>(in[i]); \
|
||||
} \
|
||||
const char *name = "decorate_rounding_" #name "_" #Ti "_" #To; \
|
||||
return test_fp_rounding(deviceID, context, queue, name, in, out); \
|
||||
}
|
||||
|
||||
TEST_SPIRV_FP_ROUNDING_DECORATE(rte, round_to_even, half, short);
|
||||
TEST_SPIRV_FP_ROUNDING_DECORATE(rtz, round_to_zero, half, short);
|
||||
TEST_SPIRV_FP_ROUNDING_DECORATE(rtp, round_to_posinf, half, short);
|
||||
TEST_SPIRV_FP_ROUNDING_DECORATE(rtn, round_to_neginf, half, short);
|
||||
|
||||
TEST_SPIRV_FP_ROUNDING_DECORATE(rte, round_to_even, float, int);
|
||||
TEST_SPIRV_FP_ROUNDING_DECORATE(rtz, round_to_zero, float, int);
|
||||
|
||||
@@ -94,6 +94,13 @@ T genrandReal(RandomSeed &seed)
|
||||
return genrand_real1(seed);
|
||||
}
|
||||
|
||||
// Longer-term this could be refactored out and replace random_float():
|
||||
template <typename T> T genrandReal_range(T low, T high, RandomSeed &seed)
|
||||
{
|
||||
T t = genrand_real1(seed);
|
||||
return (1.0 - t) * low + t * high;
|
||||
}
|
||||
|
||||
template<typename T, int N>
|
||||
T genrandRealVec(RandomSeed &seed)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user