HIP: Heterogenous-computing Interface for Portability
hip_fp16.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H
24 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_FP16_H
25 
27 #if ( __clang_major__ > 3)
28 typedef __fp16 __half;
29 typedef __fp16 __half1 __attribute__((ext_vector_type(1)));
30 typedef __fp16 __half2 __attribute__((ext_vector_type(2)));
31 typedef __fp16 half;
32 
33 /*
34 Half Arithmetic Functions
35 */
36 __device__ __half __hadd(const __half a, const __half b);
37 __device__ __half __hadd_sat(__half a, __half b);
38 __device__ __half __hfma(__half a, __half b, __half c);
39 __device__ __half __hfma_sat(__half a, __half b, __half c);
40 __device__ __half __hmul(__half a, __half b);
41 __device__ __half __hmul_sat(__half a, __half b);
42 __device__ __half __hneg(__half a);
43 __device__ __half __hsub(__half a, __half b);
44 __device__ __half __hsub_sat(__half a, __half b);
45 __device__ __half hdiv(__half a, __half b);
46 
47 /*
48 Half2 Arithmetic Functions
49 */
50 
51 __device__ static __half2 __hadd2(__half2 a, __half2 b);
52 __device__ static __half2 __hadd2_sat(__half2 a, __half2 b);
53 __device__ static __half2 __hfma2(__half2 a, __half2 b, __half2 c);
54 __device__ static __half2 __hfma2_sat(__half2 a, __half2 b, __half2 c);
55 __device__ static __half2 __hmul2(__half2 a, __half2 b);
56 __device__ static __half2 __hmul2_sat(__half2 a, __half2 b);
57 __device__ static __half2 __hsub2(__half2 a, __half2 b);
58 __device__ static __half2 __hneg2(__half2 a);
59 __device__ static __half2 __hsub2_sat(__half2 a, __half2 b);
60 __device__ static __half2 h2div(__half2 a, __half2 b);
61 
62 /*
63 Half Comparision Functions
64 */
65 
66 __device__ bool __heq(__half a, __half b);
67 __device__ bool __hge(__half a, __half b);
68 __device__ bool __hgt(__half a, __half b);
69 __device__ bool __hisinf(__half a);
70 __device__ bool __hisnan(__half a);
71 __device__ bool __hle(__half a, __half b);
72 __device__ bool __hlt(__half a, __half b);
73 __device__ bool __hne(__half a, __half b);
74 
75 /*
76 Half2 Comparision Functions
77 */
78 
79 __device__ bool __hbeq2(__half2 a, __half2 b);
80 __device__ bool __hbge2(__half2 a, __half2 b);
81 __device__ bool __hbgt2(__half2 a, __half2 b);
82 __device__ bool __hble2(__half2 a, __half2 b);
83 __device__ bool __hblt2(__half2 a, __half2 b);
84 __device__ bool __hbne2(__half2 a, __half2 b);
85 __device__ __half2 __heq2(__half2 a, __half2 b);
86 __device__ __half2 __hge2(__half2 a, __half2 b);
87 __device__ __half2 __hgt2(__half2 a, __half2 b);
88 __device__ __half2 __hisnan2(__half2 a);
89 __device__ __half2 __hle2(__half2 a, __half2 b);
90 __device__ __half2 __hlt2(__half2 a, __half2 b);
91 __device__ __half2 __hne2(__half2 a, __half2 b);
92 
93 /*
94 Half Math Functions
95 */
96 
97 __device__ static __half hceil(const __half h);
98 __device__ static __half hcos(const __half h);
99 __device__ static __half hexp(const __half h);
100 __device__ static __half hexp10(const __half h);
101 __device__ static __half hexp2(const __half h);
102 __device__ static __half hfloor(const __half h);
103 __device__ static __half hlog(const __half h);
104 __device__ static __half hlog10(const __half h);
105 __device__ static __half hlog2(const __half h);
106 //__device__ static __half hrcp(const __half h);
107 __device__ static __half hrint(const __half h);
108 __device__ static __half hsin(const __half h);
109 __device__ static __half hsqrt(const __half a);
110 __device__ static __half htrunc(const __half a);
111 
112 /*
113 Half2 Math Functions
114 */
115 
116 __device__ static __half2 h2ceil(const __half2 h);
117 __device__ static __half2 h2exp(const __half2 h);
118 __device__ static __half2 h2exp10(const __half2 h);
119 __device__ static __half2 h2exp2(const __half2 h);
120 __device__ static __half2 h2floor(const __half2 h);
121 __device__ static __half2 h2log(const __half2 h);
122 __device__ static __half2 h2log10(const __half2 h);
123 __device__ static __half2 h2log2(const __half2 h);
124 __device__ static __half2 h2rcp(const __half2 h);
125 __device__ static __half2 h2rsqrt(const __half2 h);
126 __device__ static __half2 h2sin(const __half2 h);
127 __device__ static __half2 h2sqrt(const __half2 h);
128 
129 /*
130 Half Conversion And Data Movement
131 */
132 
133 __device__ __half2 __float22half2_rn(const float2 a);
134 __device__ __half __float2half(const float a);
135 __device__ __half2 __float2half2_rn(const float a);
136 __device__ __half __float2half_rd(const float a);
137 __device__ __half __float2half_rn(const float a);
138 __device__ __half __float2half_ru(const float a);
139 __device__ __half __float2half_rz(const float a);
140 __device__ __half2 __floats2half2_rn(const float a, const float b);
141 __device__ float2 __half22float2(const __half2 a);
142 __device__ float __half2float(const __half a);
143 __device__ __half2 half2half2(const __half a);
144 __device__ int __half2int_rd(__half h);
145 __device__ int __half2int_rn(__half h);
146 __device__ int __half2int_ru(__half h);
147 __device__ int __half2int_rz(__half h);
148 __device__ long long int __half2ll_rd(__half h);
149 __device__ long long int __half2ll_rn(__half h);
150 __device__ long long int __half2ll_ru(__half h);
151 __device__ long long int __half2ll_rz(__half h);
152 __device__ short __half2short_rd(__half h);
153 __device__ short __half2short_rn(__half h);
154 __device__ short __half2short_ru(__half h);
155 __device__ short __half2short_rz(__half h);
156 __device__ unsigned int __half2uint_rd(__half h);
157 __device__ unsigned int __half2uint_rn(__half h);
158 __device__ unsigned int __half2uint_ru(__half h);
159 __device__ unsigned int __half2uint_rz(__half h);
160 __device__ unsigned long long int __half2ull_rd(__half h);
161 __device__ unsigned long long int __half2ull_rn(__half h);
162 __device__ unsigned long long int __half2ull_ru(__half h);
163 __device__ unsigned long long int __half2ull_rz(__half h);
164 __device__ unsigned short int __half2ushort_rd(__half h);
165 __device__ unsigned short int __half2ushort_rn(__half h);
166 __device__ unsigned short int __half2ushort_ru(__half h);
167 __device__ unsigned short int __half2ushort_rz(__half h);
168 __device__ short int __half_as_short(const __half h);
169 __device__ unsigned short int __half_as_ushort(const __half h);
170 __device__ __half2 __halves2half2(const __half a, const __half b);
171 __device__ float __high2float(const __half2 a);
172 __device__ __half __high2half(const __half2 a);
173 __device__ __half2 __high2half2(const __half2 a);
174 __device__ __half2 __highs2half2(const __half2 a, const __half2 b);
175 __device__ __half __int2half_rd(int i);
176 __device__ __half __int2half_rn(int i);
177 __device__ __half __int2half_ru(int i);
178 __device__ __half __int2half_rz(int i);
179 __device__ __half __ll2half_rd(long long int i);
180 __device__ __half __ll2half_rn(long long int i);
181 __device__ __half __ll2half_ru(long long int i);
182 __device__ __half __ll2half_rz(long long int i);
183 __device__ float __low2float(const __half2 a);
184 
185 __device__ __half __low2half(const __half2 a);
186 __device__ __half2 __low2half2(const __half2 a, const __half2 b);
187 __device__ __half2 __low2half2(const __half2 a);
188 __device__ __half2 __lowhigh2highlow(const __half2 a);
189 __device__ __half2 __lows2half2(const __half2 a, const __half2 b);
190 __device__ __half __short2half_rd(short int i);
191 __device__ __half __short2half_rn(short int i);
192 __device__ __half __short2half_ru(short int i);
193 __device__ __half __short2half_rz(short int i);
194 __device__ __half __uint2half_rd(unsigned int i);
195 __device__ __half __uint2half_rn(unsigned int i);
196 __device__ __half __uint2half_ru(unsigned int i);
197 __device__ __half __uint2half_rz(unsigned int i);
198 __device__ __half __ull2half_rd(unsigned long long int i);
199 __device__ __half __ull2half_rn(unsigned long long int i);
200 __device__ __half __ull2half_ru(unsigned long long int i);
201 __device__ __half __ull2half_rz(unsigned long long int i);
202 __device__ __half __ushort2half_rd(unsigned short int i);
203 __device__ __half __ushort2half_rn(unsigned short int i);
204 __device__ __half __ushort2half_ru(unsigned short int i);
205 __device__ __half __ushort2half_rz(unsigned short int i);
206 __device__ __half __ushort_as_half(const unsigned short int i);
207 
208 extern "C" __half2 __hip_hc_ir_hadd2_int(__half2, __half2);
209 extern "C" __half2 __hip_hc_ir_hfma2_int(__half2, __half2, __half2);
210 extern "C" __half2 __hip_hc_ir_hmul2_int(__half2, __half2);
211 extern "C" __half2 __hip_hc_ir_hsub2_int(__half2, __half2);
212 
213 extern "C" __half __hip_hc_ir_hceil_half(__half) __asm("llvm.ceil.f16");
214 extern "C" __half __hip_hc_ir_hcos_half(__half) __asm("llvm.cos.f16");
215 extern "C" __half __hip_hc_ir_hexp2_half(__half) __asm("llvm.exp2.f16");
216 extern "C" __half __hip_hc_ir_hfloor_half(__half) __asm("llvm.floor.f16");
217 extern "C" __half __hip_hc_ir_hlog2_half(__half) __asm("llvm.log2.f16");
218 extern "C" __half __hip_hc_ir_hrcp_half(__half) __asm("llvm.amdgcn.rcp.f16");
219 extern "C" __half __hip_hc_ir_hrint_half(__half) __asm("llvm.rint.f16");
220 extern "C" __half __hip_hc_ir_hrsqrt_half(__half) __asm("llvm.sqrt.f16");
221 extern "C" __half __hip_hc_ir_hsin_half(__half) __asm("llvm.sin.f16");
222 extern "C" __half __hip_hc_ir_hsqrt_half(__half) __asm("llvm.sqrt.f16");
223 extern "C" __half __hip_hc_ir_htrunc_half(__half) __asm("llvm.trunc.f16");
224 
225 extern "C" __half2 __hip_hc_ir_h2ceil_int(__half2);
226 extern "C" __half2 __hip_hc_ir_h2cos_int(__half2);
227 extern "C" __half2 __hip_hc_ir_h2exp2_int(__half2);
228 extern "C" __half2 __hip_hc_ir_h2floor_int(__half2);
229 extern "C" __half2 __hip_hc_ir_h2log2_int(__half2);
230 extern "C" __half2 __hip_hc_ir_h2rcp_int(__half2);
231 extern "C" __half2 __hip_hc_ir_h2rsqrt_int(__half2);
232 extern "C" __half2 __hip_hc_ir_h2sin_int(__half2);
233 extern "C" __half2 __hip_hc_ir_h2sqrt_int(__half2);
234 extern "C" __half2 __hip_hc_ir_h2trunc_int(__half2);
235 
236 /*
237  Half2 Arithmetic Functions
238 */
239 
240 __device__ static inline __half2 __hadd2(__half2 a, __half2 b) {
241  __half2 c;
242  c.xy = __hip_hc_ir_hadd2_int(a.xy, b.xy);
243  return c;
244 }
245 
246 __device__ static inline __half2 __hadd2_sat(__half2 a, __half2 b) {
247  __half2 c;
248  c.xy = __hip_hc_ir_hadd2_int(a.xy, b.xy);
249  return c;
250 }
251 
252 __device__ static inline __half2 __hfma2(__half2 a, __half2 b, __half2 c) {
253  __half2 d;
254  d.xy = __hip_hc_ir_hfma2_int(a.xy, b.xy, c.xy);
255  return d;
256 }
257 
258 __device__ static inline __half2 __hfma2_sat(__half2 a, __half2 b, __half2 c) {
259  __half2 d;
260  d.xy = __hip_hc_ir_hfma2_int(a.xy, b.xy, c.xy);
261  return d;
262 }
263 
264 __device__ static inline __half2 __hmul2(__half2 a, __half2 b) {
265  __half2 c;
266  c.xy = __hip_hc_ir_hmul2_int(a.xy, b.xy);
267  return c;
268 }
269 
270 __device__ static inline __half2 __hmul2_sat(__half2 a, __half2 b) {
271  __half2 c;
272  c.xy = __hip_hc_ir_hmul2_int(a.xy, b.xy);
273  return c;
274 }
275 
276 __device__ static inline __half2 __hsub2(__half2 a, __half2 b) {
277  __half2 c;
278  c.xy = __hip_hc_ir_hsub2_int(a.xy, b.xy);
279  return c;
280 }
281 
282 __device__ static inline __half2 __hneg2(__half2 a) {
283  __half2 c;
284  c.x = - a.x;
285  c.y = - a.y;
286  return c;
287 }
288 
289 __device__ static inline __half2 __hsub2_sat(__half2 a, __half2 b) {
290  __half2 c;
291  c.xy = __hip_hc_ir_hsub2_int(a.xy, b.xy);
292  return c;
293 }
294 
295 __device__ static inline __half2 h2div(__half2 a, __half2 b) {
296  __half2 c;
297  c.x = a.x / b.x;
298  c.y = a.y / b.y;
299  return c;
300 }
301 
302 
303 __device__ static inline __half hceil(const __half h) {
304  return __hip_hc_ir_hceil_half(h);
305 }
306 
307 __device__ static inline __half hcos(const __half h) {
308  return __hip_hc_ir_hcos_half(h);
309 }
310 
311 __device__ static inline __half hexp(const __half h) {
312  return __hip_hc_ir_hexp2_half(__hmul(h, 1.442694));
313 }
314 
315 __device__ static inline __half hexp10(const __half h) {
316  return __hip_hc_ir_hexp2_half(__hmul(h, 3.3219281));
317 }
318 
319 __device__ static inline __half hexp2(const __half h) {
320  return __hip_hc_ir_hexp2_half(h);
321 }
322 
323 __device__ static inline __half hfloor(const __half h) {
324  return __hip_hc_ir_hfloor_half(h);
325 }
326 
327 __device__ static inline __half hlog(const __half h) {
328  return __hmul(__hip_hc_ir_hlog2_half(h), 0.693147);
329 }
330 
331 __device__ static inline __half hlog10(const __half h) {
332  return __hmul(__hip_hc_ir_hlog2_half(h), 0.301029);
333 }
334 
335 __device__ static inline __half hlog2(const __half h) {
336  return __hip_hc_ir_hlog2_half(h);
337 }
338 /*
339 __device__ static inline __half hrcp(const __half h) {
340  return __hip_hc_ir_hrcp_half(h);
341 }
342 */
343 __device__ static inline __half hrint(const __half h) {
344  return __hip_hc_ir_hrint_half(h);
345 }
346 
347 __device__ static inline __half hrsqrt(const __half h) {
348  return __hip_hc_ir_hrsqrt_half(h);
349 }
350 
351 __device__ static inline __half hsin(const __half h) {
352  return __hip_hc_ir_hsin_half(h);
353 }
354 
355 __device__ static inline __half hsqrt(const __half a) {
356  return __hip_hc_ir_hsqrt_half(a);
357 }
358 
359 __device__ static inline __half htrunc(const __half a) {
360  return __hip_hc_ir_htrunc_half(a);
361 }
362 
363 /*
364 Half2 Math Operations
365 */
366 
367 __device__ static inline __half2 h2ceil(const __half2 h) {
368  __half2 a;
369  a.xy = __hip_hc_ir_h2ceil_int(h.xy);
370  return a;
371 }
372 
373 __device__ static inline __half2 h2cos(const __half2 h) {
374  __half2 a;
375  a.xy = __hip_hc_ir_h2cos_int(h.xy);
376  return a;
377 }
378 
379 __device__ static inline __half2 h2exp(const __half2 h) {
380  __half2 factor;
381  factor.x = 1.442694;
382  factor.y = 1.442694;
383  factor.xy = __hip_hc_ir_h2exp2_int(__hip_hc_ir_hmul2_int(h.xy, factor.xy));
384  return factor;
385 }
386 
387 __device__ static inline __half2 h2exp10(const __half2 h) {
388  __half2 factor;
389  factor.x = 3.3219281;
390  factor.y = 3.3219281;
391  factor.xy = __hip_hc_ir_h2exp2_int(__hip_hc_ir_hmul2_int(h.xy, factor.xy));
392  return factor;
393 }
394 
395 __device__ static inline __half2 h2exp2(const __half2 h) {
396  __half2 a;
397  a.xy = __hip_hc_ir_h2exp2_int(h.xy);
398  return a;
399 }
400 
401 __device__ static inline __half2 h2floor(const __half2 h) {
402  __half2 a;
403  a.xy = __hip_hc_ir_h2floor_int(h.xy);
404  return a;
405 }
406 
407 __device__ static inline __half2 h2log(const __half2 h) {
408  __half2 factor;
409  factor.x = 0.693147;
410  factor.y = 0.693147;
411  factor.xy = __hip_hc_ir_hmul2_int(__hip_hc_ir_h2log2_int(h.xy), factor.xy);
412  return factor;
413 }
414 
415 __device__ static inline __half2 h2log10(const __half2 h) {
416  __half2 factor;
417  factor.x = 0.301029;
418  factor.y = 0.301029;
419  factor.xy = __hip_hc_ir_hmul2_int(__hip_hc_ir_h2log2_int(h.xy), factor.xy);
420  return factor;
421 }
422 __device__ static inline __half2 h2log2(const __half2 h) {
423  __half2 a;
424  a.xy = __hip_hc_ir_h2log2_int(h.xy);
425  return a;
426 }
427 
428 __device__ static inline __half2 h2rcp(const __half2 h) {
429  __half2 a;
430  a.xy = __hip_hc_ir_h2rcp_int(h.xy);
431  return a;
432 }
433 
434 __device__ static inline __half2 h2rsqrt(const __half2 h) {
435  __half2 a;
436  a.xy = __hip_hc_ir_h2rsqrt_int(h.xy);
437  return a;
438 }
439 
440 __device__ static inline __half2 h2sin(const __half2 h) {
441  __half2 a;
442  a.xy = __hip_hc_ir_h2sin_int(h.xy);
443  return a;
444 }
445 
446 __device__ static inline __half2 h2sqrt(const __half2 h) {
447  __half2 a;
448  a.xy = __hip_hc_ir_h2sqrt_int(h.xy);
449  return a;
450 }
451 
452 __device__ static inline __half2 h2trunc(const __half2 h) {
453  __half2 a;
454  a.xy = __hip_hc_ir_h2trunc_int(h.xy);
455  return a;
456 }
457 #endif //clang_major > 3
458 
459 #endif
Definition: hip_vector_types.h:643
Defines the different newt vector types for HIP runtime.