23 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H 24 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H 27 #if ( __clang_major__ > 3) 28 typedef __fp16 __half;
29 typedef __fp16 __half1 __attribute__((ext_vector_type(1)));
30 typedef __fp16 __half2 __attribute__((ext_vector_type(2)));
36 __device__ __half __hadd(
const __half a,
const __half b);
37 __device__ __half __hadd_sat(__half a, __half b);
38 __device__ __half __hfma(__half a, __half b, __half c);
39 __device__ __half __hfma_sat(__half a, __half b, __half c);
40 __device__ __half __hmul(__half a, __half b);
41 __device__ __half __hmul_sat(__half a, __half b);
42 __device__ __half __hneg(__half a);
43 __device__ __half __hsub(__half a, __half b);
44 __device__ __half __hsub_sat(__half a, __half b);
45 __device__ __half hdiv(__half a, __half b);
51 __device__
static __half2 __hadd2(__half2 a, __half2 b);
52 __device__
static __half2 __hadd2_sat(__half2 a, __half2 b);
53 __device__
static __half2 __hfma2(__half2 a, __half2 b, __half2 c);
54 __device__
static __half2 __hfma2_sat(__half2 a, __half2 b, __half2 c);
55 __device__
static __half2 __hmul2(__half2 a, __half2 b);
56 __device__
static __half2 __hmul2_sat(__half2 a, __half2 b);
57 __device__
static __half2 __hsub2(__half2 a, __half2 b);
58 __device__
static __half2 __hneg2(__half2 a);
59 __device__
static __half2 __hsub2_sat(__half2 a, __half2 b);
60 __device__
static __half2 h2div(__half2 a, __half2 b);
66 __device__
bool __heq(__half a, __half b);
67 __device__
bool __hge(__half a, __half b);
68 __device__
bool __hgt(__half a, __half b);
69 __device__
bool __hisinf(__half a);
70 __device__
bool __hisnan(__half a);
71 __device__
bool __hle(__half a, __half b);
72 __device__
bool __hlt(__half a, __half b);
73 __device__
bool __hne(__half a, __half b);
79 __device__
bool __hbeq2(__half2 a, __half2 b);
80 __device__
bool __hbge2(__half2 a, __half2 b);
81 __device__
bool __hbgt2(__half2 a, __half2 b);
82 __device__
bool __hble2(__half2 a, __half2 b);
83 __device__
bool __hblt2(__half2 a, __half2 b);
84 __device__
bool __hbne2(__half2 a, __half2 b);
85 __device__ __half2 __heq2(__half2 a, __half2 b);
86 __device__ __half2 __hge2(__half2 a, __half2 b);
87 __device__ __half2 __hgt2(__half2 a, __half2 b);
88 __device__ __half2 __hisnan2(__half2 a);
89 __device__ __half2 __hle2(__half2 a, __half2 b);
90 __device__ __half2 __hlt2(__half2 a, __half2 b);
91 __device__ __half2 __hne2(__half2 a, __half2 b);
97 __device__
static __half hceil(
const __half h);
98 __device__
static __half hcos(
const __half h);
99 __device__
static __half hexp(
const __half h);
100 __device__
static __half hexp10(
const __half h);
101 __device__
static __half hexp2(
const __half h);
102 __device__
static __half hfloor(
const __half h);
103 __device__
static __half hlog(
const __half h);
104 __device__
static __half hlog10(
const __half h);
105 __device__
static __half hlog2(
const __half h);
107 __device__
static __half hrint(
const __half h);
108 __device__
static __half hsin(
const __half h);
109 __device__
static __half hsqrt(
const __half a);
110 __device__
static __half htrunc(
const __half a);
116 __device__
static __half2 h2ceil(
const __half2 h);
117 __device__
static __half2 h2exp(
const __half2 h);
118 __device__
static __half2 h2exp10(
const __half2 h);
119 __device__
static __half2 h2exp2(
const __half2 h);
120 __device__
static __half2 h2floor(
const __half2 h);
121 __device__
static __half2 h2log(
const __half2 h);
122 __device__
static __half2 h2log10(
const __half2 h);
123 __device__
static __half2 h2log2(
const __half2 h);
124 __device__
static __half2 h2rcp(
const __half2 h);
125 __device__
static __half2 h2rsqrt(
const __half2 h);
126 __device__
static __half2 h2sin(
const __half2 h);
127 __device__
static __half2 h2sqrt(
const __half2 h);
133 __device__ __half2 __float22half2_rn(
const float2 a);
134 __device__ __half __float2half(
const float a);
135 __device__ __half2 __float2half2_rn(
const float a);
136 __device__ __half __float2half_rd(
const float a);
137 __device__ __half __float2half_rn(
const float a);
138 __device__ __half __float2half_ru(
const float a);
139 __device__ __half __float2half_rz(
const float a);
140 __device__ __half2 __floats2half2_rn(
const float a,
const float b);
141 __device__
float2 __half22float2(
const __half2 a);
142 __device__
float __half2float(
const __half a);
143 __device__ __half2 half2half2(
const __half a);
144 __device__
int __half2int_rd(__half h);
145 __device__
int __half2int_rn(__half h);
146 __device__
int __half2int_ru(__half h);
147 __device__
int __half2int_rz(__half h);
148 __device__
long long int __half2ll_rd(__half h);
149 __device__
long long int __half2ll_rn(__half h);
150 __device__
long long int __half2ll_ru(__half h);
151 __device__
long long int __half2ll_rz(__half h);
152 __device__
short __half2short_rd(__half h);
153 __device__
short __half2short_rn(__half h);
154 __device__
short __half2short_ru(__half h);
155 __device__
short __half2short_rz(__half h);
156 __device__
unsigned int __half2uint_rd(__half h);
157 __device__
unsigned int __half2uint_rn(__half h);
158 __device__
unsigned int __half2uint_ru(__half h);
159 __device__
unsigned int __half2uint_rz(__half h);
160 __device__
unsigned long long int __half2ull_rd(__half h);
161 __device__
unsigned long long int __half2ull_rn(__half h);
162 __device__
unsigned long long int __half2ull_ru(__half h);
163 __device__
unsigned long long int __half2ull_rz(__half h);
164 __device__
unsigned short int __half2ushort_rd(__half h);
165 __device__
unsigned short int __half2ushort_rn(__half h);
166 __device__
unsigned short int __half2ushort_ru(__half h);
167 __device__
unsigned short int __half2ushort_rz(__half h);
168 __device__
short int __half_as_short(
const __half h);
169 __device__
unsigned short int __half_as_ushort(
const __half h);
170 __device__ __half2 __halves2half2(
const __half a,
const __half b);
171 __device__
float __high2float(
const __half2 a);
172 __device__ __half __high2half(
const __half2 a);
173 __device__ __half2 __high2half2(
const __half2 a);
174 __device__ __half2 __highs2half2(
const __half2 a,
const __half2 b);
175 __device__ __half __int2half_rd(
int i);
176 __device__ __half __int2half_rn(
int i);
177 __device__ __half __int2half_ru(
int i);
178 __device__ __half __int2half_rz(
int i);
179 __device__ __half __ll2half_rd(
long long int i);
180 __device__ __half __ll2half_rn(
long long int i);
181 __device__ __half __ll2half_ru(
long long int i);
182 __device__ __half __ll2half_rz(
long long int i);
183 __device__
float __low2float(
const __half2 a);
185 __device__ __half __low2half(
const __half2 a);
186 __device__ __half2 __low2half2(
const __half2 a,
const __half2 b);
187 __device__ __half2 __low2half2(
const __half2 a);
188 __device__ __half2 __lowhigh2highlow(
const __half2 a);
189 __device__ __half2 __lows2half2(
const __half2 a,
const __half2 b);
190 __device__ __half __short2half_rd(
short int i);
191 __device__ __half __short2half_rn(
short int i);
192 __device__ __half __short2half_ru(
short int i);
193 __device__ __half __short2half_rz(
short int i);
194 __device__ __half __uint2half_rd(
unsigned int i);
195 __device__ __half __uint2half_rn(
unsigned int i);
196 __device__ __half __uint2half_ru(
unsigned int i);
197 __device__ __half __uint2half_rz(
unsigned int i);
198 __device__ __half __ull2half_rd(
unsigned long long int i);
199 __device__ __half __ull2half_rn(
unsigned long long int i);
200 __device__ __half __ull2half_ru(
unsigned long long int i);
201 __device__ __half __ull2half_rz(
unsigned long long int i);
202 __device__ __half __ushort2half_rd(
unsigned short int i);
203 __device__ __half __ushort2half_rn(
unsigned short int i);
204 __device__ __half __ushort2half_ru(
unsigned short int i);
205 __device__ __half __ushort2half_rz(
unsigned short int i);
206 __device__ __half __ushort_as_half(
const unsigned short int i);
208 extern "C" __half2 __hip_hc_ir_hadd2_int(__half2, __half2);
209 extern "C" __half2 __hip_hc_ir_hfma2_int(__half2, __half2, __half2);
210 extern "C" __half2 __hip_hc_ir_hmul2_int(__half2, __half2);
211 extern "C" __half2 __hip_hc_ir_hsub2_int(__half2, __half2);
213 extern "C" __half __hip_hc_ir_hceil_half(__half) __asm("llvm.ceil.f16");
214 extern "C" __half __hip_hc_ir_hcos_half(__half) __asm("llvm.cos.f16");
215 extern "C" __half __hip_hc_ir_hexp2_half(__half) __asm("llvm.exp2.f16");
216 extern "C" __half __hip_hc_ir_hfloor_half(__half) __asm("llvm.floor.f16");
217 extern "C" __half __hip_hc_ir_hlog2_half(__half) __asm("llvm.log2.f16");
218 extern "C" __half __hip_hc_ir_hrcp_half(__half) __asm("llvm.amdgcn.rcp.f16");
219 extern "C" __half __hip_hc_ir_hrint_half(__half) __asm("llvm.rint.f16");
220 extern "C" __half __hip_hc_ir_hrsqrt_half(__half) __asm("llvm.sqrt.f16");
221 extern "C" __half __hip_hc_ir_hsin_half(__half) __asm("llvm.sin.f16");
222 extern "C" __half __hip_hc_ir_hsqrt_half(__half) __asm("llvm.sqrt.f16");
223 extern "C" __half __hip_hc_ir_htrunc_half(__half) __asm("llvm.trunc.f16");
225 extern "C" __half2 __hip_hc_ir_h2ceil_int(__half2);
226 extern "C" __half2 __hip_hc_ir_h2cos_int(__half2);
227 extern "C" __half2 __hip_hc_ir_h2exp2_int(__half2);
228 extern "C" __half2 __hip_hc_ir_h2floor_int(__half2);
229 extern "C" __half2 __hip_hc_ir_h2log2_int(__half2);
230 extern "C" __half2 __hip_hc_ir_h2rcp_int(__half2);
231 extern "C" __half2 __hip_hc_ir_h2rsqrt_int(__half2);
232 extern "C" __half2 __hip_hc_ir_h2sin_int(__half2);
233 extern "C" __half2 __hip_hc_ir_h2sqrt_int(__half2);
234 extern "C" __half2 __hip_hc_ir_h2trunc_int(__half2);
240 __device__ static inline __half2 __hadd2(__half2 a, __half2 b) {
242 c.xy = __hip_hc_ir_hadd2_int(a.xy, b.xy);
246 __device__
static inline __half2 __hadd2_sat(__half2 a, __half2 b) {
248 c.xy = __hip_hc_ir_hadd2_int(a.xy, b.xy);
252 __device__
static inline __half2 __hfma2(__half2 a, __half2 b, __half2 c) {
254 d.xy = __hip_hc_ir_hfma2_int(a.xy, b.xy, c.xy);
258 __device__
static inline __half2 __hfma2_sat(__half2 a, __half2 b, __half2 c) {
260 d.xy = __hip_hc_ir_hfma2_int(a.xy, b.xy, c.xy);
264 __device__
static inline __half2 __hmul2(__half2 a, __half2 b) {
266 c.xy = __hip_hc_ir_hmul2_int(a.xy, b.xy);
270 __device__
static inline __half2 __hmul2_sat(__half2 a, __half2 b) {
272 c.xy = __hip_hc_ir_hmul2_int(a.xy, b.xy);
276 __device__
static inline __half2 __hsub2(__half2 a, __half2 b) {
278 c.xy = __hip_hc_ir_hsub2_int(a.xy, b.xy);
282 __device__
static inline __half2 __hneg2(__half2 a) {
289 __device__
static inline __half2 __hsub2_sat(__half2 a, __half2 b) {
291 c.xy = __hip_hc_ir_hsub2_int(a.xy, b.xy);
295 __device__
static inline __half2 h2div(__half2 a, __half2 b) {
303 __device__
static inline __half hceil(
const __half h) {
304 return __hip_hc_ir_hceil_half(h);
307 __device__
static inline __half hcos(
const __half h) {
308 return __hip_hc_ir_hcos_half(h);
311 __device__
static inline __half hexp(
const __half h) {
312 return __hip_hc_ir_hexp2_half(__hmul(h, 1.442694));
315 __device__
static inline __half hexp10(
const __half h) {
316 return __hip_hc_ir_hexp2_half(__hmul(h, 3.3219281));
319 __device__
static inline __half hexp2(
const __half h) {
320 return __hip_hc_ir_hexp2_half(h);
323 __device__
static inline __half hfloor(
const __half h) {
324 return __hip_hc_ir_hfloor_half(h);
327 __device__
static inline __half hlog(
const __half h) {
328 return __hmul(__hip_hc_ir_hlog2_half(h), 0.693147);
331 __device__
static inline __half hlog10(
const __half h) {
332 return __hmul(__hip_hc_ir_hlog2_half(h), 0.301029);
335 __device__
static inline __half hlog2(
const __half h) {
336 return __hip_hc_ir_hlog2_half(h);
343 __device__
static inline __half hrint(
const __half h) {
344 return __hip_hc_ir_hrint_half(h);
347 __device__
static inline __half hrsqrt(
const __half h) {
348 return __hip_hc_ir_hrsqrt_half(h);
351 __device__
static inline __half hsin(
const __half h) {
352 return __hip_hc_ir_hsin_half(h);
355 __device__
static inline __half hsqrt(
const __half a) {
356 return __hip_hc_ir_hsqrt_half(a);
359 __device__
static inline __half htrunc(
const __half a) {
360 return __hip_hc_ir_htrunc_half(a);
367 __device__
static inline __half2 h2ceil(
const __half2 h) {
369 a.xy = __hip_hc_ir_h2ceil_int(h.xy);
373 __device__
static inline __half2 h2cos(
const __half2 h) {
375 a.xy = __hip_hc_ir_h2cos_int(h.xy);
379 __device__
static inline __half2 h2exp(
const __half2 h) {
383 factor.xy = __hip_hc_ir_h2exp2_int(__hip_hc_ir_hmul2_int(h.xy, factor.xy));
387 __device__
static inline __half2 h2exp10(
const __half2 h) {
389 factor.x = 3.3219281;
390 factor.y = 3.3219281;
391 factor.xy = __hip_hc_ir_h2exp2_int(__hip_hc_ir_hmul2_int(h.xy, factor.xy));
395 __device__
static inline __half2 h2exp2(
const __half2 h) {
397 a.xy = __hip_hc_ir_h2exp2_int(h.xy);
401 __device__
static inline __half2 h2floor(
const __half2 h) {
403 a.xy = __hip_hc_ir_h2floor_int(h.xy);
407 __device__
static inline __half2 h2log(
const __half2 h) {
411 factor.xy = __hip_hc_ir_hmul2_int(__hip_hc_ir_h2log2_int(h.xy), factor.xy);
415 __device__
static inline __half2 h2log10(
const __half2 h) {
419 factor.xy = __hip_hc_ir_hmul2_int(__hip_hc_ir_h2log2_int(h.xy), factor.xy);
422 __device__
static inline __half2 h2log2(
const __half2 h) {
424 a.xy = __hip_hc_ir_h2log2_int(h.xy);
428 __device__
static inline __half2 h2rcp(
const __half2 h) {
430 a.xy = __hip_hc_ir_h2rcp_int(h.xy);
434 __device__
static inline __half2 h2rsqrt(
const __half2 h) {
436 a.xy = __hip_hc_ir_h2rsqrt_int(h.xy);
440 __device__
static inline __half2 h2sin(
const __half2 h) {
442 a.xy = __hip_hc_ir_h2sin_int(h.xy);
446 __device__
static inline __half2 h2sqrt(
const __half2 h) {
448 a.xy = __hip_hc_ir_h2sqrt_int(h.xy);
452 __device__
static inline __half2 h2trunc(
const __half2 h) {
454 a.xy = __hip_hc_ir_h2trunc_int(h.xy);
457 #endif //clang_major > 3 Definition: hip_vector_types.h:643
Defines the different newt vector types for HIP runtime.