HIP: Heterogenous-computing Interface for Portability
device_functions.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_FUNCTIONS_H
24 #define HIP_INCLUDE_HIP_HCC_DETAIL_DEVICE_FUNCTIONS_H
25 
26 #include <hip/hip_runtime.h>
27 #include <hip/hip_vector_types.h>
28 
29 
30 
31 
32 
33 // Single Precision Fast Math
34 __device__ float __cosf(float x);
35 __device__ float __exp10f(float x);
36 __device__ float __expf(float x);
37 __device__ static float __fadd_rd(float x, float y);
38 __device__ static float __fadd_rn(float x, float y);
39 __device__ static float __fadd_ru(float x, float y);
40 __device__ static float __fadd_rz(float x, float y);
41 __device__ static float __fdiv_rd(float x, float y);
42 __device__ static float __fdiv_rn(float x, float y);
43 __device__ static float __fdiv_ru(float x, float y);
44 __device__ static float __fdiv_rz(float x, float y);
45 __device__ static float __fdividef(float x, float y);
46 __device__ float __fmaf_rd(float x, float y, float z);
47 __device__ float __fmaf_rn(float x, float y, float z);
48 __device__ float __fmaf_ru(float x, float y, float z);
49 __device__ float __fmaf_rz(float x, float y, float z);
50 __device__ static float __fmul_rd(float x, float y);
51 __device__ static float __fmul_rn(float x, float y);
52 __device__ static float __fmul_ru(float x, float y);
53 __device__ static float __fmul_rz(float x, float y);
54 __device__ float __frcp_rd(float x);
55 __device__ float __frcp_rn(float x);
56 __device__ float __frcp_ru(float x);
57 __device__ float __frcp_rz(float x);
58 __device__ float __frsqrt_rn(float x);
59 __device__ float __fsqrt_rd(float x);
60 __device__ float __fsqrt_rn(float x);
61 __device__ float __fsqrt_ru(float x);
62 __device__ float __fsqrt_rz(float x);
63 __device__ static float __fsub_rd(float x, float y);
64 __device__ static float __fsub_rn(float x, float y);
65 __device__ static float __fsub_ru(float x, float y);
66 __device__ float __log10f(float x);
67 __device__ float __log2f(float x);
68 __device__ float __logf(float x);
69 __device__ float __powf(float base, float exponent);
70 __device__ static float __saturatef(float x);
71 __device__ void __sincosf(float x, float *s, float *c);
72 __device__ float __sinf(float x);
73 __device__ float __tanf(float x);
74 
75 
76 /*
77 Double Precision Intrinsics
78 */
79 
80 __device__ static double __dadd_rd(double x, double y);
81 __device__ static double __dadd_rn(double x, double y);
82 __device__ static double __dadd_ru(double x, double y);
83 __device__ static double __dadd_rz(double x, double y);
84 __device__ static double __ddiv_rd(double x, double y);
85 __device__ static double __ddiv_rn(double x, double y);
86 __device__ static double __ddiv_ru(double x, double y);
87 __device__ static double __ddiv_rz(double x, double y);
88 __device__ static double __dmul_rd(double x, double y);
89 __device__ static double __dmul_rn(double x, double y);
90 __device__ static double __dmul_ru(double x, double y);
91 __device__ static double __dmul_rz(double x, double y);
92 __device__ double __drcp_rd(double x);
93 __device__ double __drcp_rn(double x);
94 __device__ double __drcp_ru(double x);
95 __device__ double __drcp_rz(double x);
96 __device__ double __dsqrt_rd(double x);
97 __device__ double __dsqrt_rn(double x);
98 __device__ double __dsqrt_ru(double x);
99 __device__ double __dsqrt_rz(double x);
100 __device__ static double __dsub_rd(double x, double y);
101 __device__ static double __dsub_rn(double x, double y);
102 __device__ static double __dsub_ru(double x, double y);
103 __device__ static double __dsub_rz(double x, double y);
104 __device__ double __fma_rd(double x, double y, double z);
105 __device__ double __fma_rn(double x, double y, double z);
106 __device__ double __fma_ru(double x, double y, double z);
107 __device__ double __fma_rz(double x, double y, double z);
108 
109 // Single Precision Fast Math
110 extern __attribute__((const)) float __hip_fast_cosf(float) __asm("llvm.cos.f32");
111 extern __attribute__((const)) float __hip_fast_exp2f(float) __asm("llvm.exp2.f32");
112 __device__ float __hip_fast_exp10f(float);
113 __device__ float __hip_fast_expf(float);
114 __device__ float __hip_fast_frsqrt_rn(float);
115 extern __attribute__((const)) float __hip_fast_fsqrt_rd(float) __asm("llvm.sqrt.f32");
116 __device__ float __hip_fast_fsqrt_rn(float);
117 __device__ float __hip_fast_fsqrt_ru(float);
118 __device__ float __hip_fast_fsqrt_rz(float);
119 __device__ float __hip_fast_log10f(float);
120 extern __attribute__((const)) float __hip_fast_log2f(float) __asm("llvm.log2.f32");
121 __device__ float __hip_fast_logf(float);
122 __device__ float __hip_fast_powf(float, float);
123 __device__ void __hip_fast_sincosf(float,float*,float*);
124 extern __attribute__((const)) float __hip_fast_sinf(float) __asm("llvm.sin.f32");
125 __device__ float __hip_fast_tanf(float);
126 extern __attribute__((const)) float __hip_fast_fmaf(float,float,float) __asm("llvm.fma.f32");
127 extern __attribute__((const)) float __hip_fast_frcp(float) __asm("llvm.amdgcn.rcp.f32");
128 
129 extern __attribute__((const)) double __hip_fast_dsqrt(double) __asm("llvm.sqrt.f64");
130 extern __attribute__((const)) double __hip_fast_fma(double,double,double) __asm("llvm.fma.f64");
131 extern __attribute__((const)) double __hip_fast_drcp(double) __asm("llvm.amdgcn.rcp.f64");
132 
133 
134 // Single Precision Fast Math
135 __device__ inline float __cosf(float x) {
136  return __hip_fast_cosf(x);
137 }
138 
139 __device__ inline float __exp10f(float x) {
140  return __hip_fast_exp10f(x);
141 }
142 
143 __device__ inline float __expf(float x) {
144  return __hip_fast_expf(x);
145 }
146 
147 __device__ static inline float __fadd_rd(float x, float y) {
148  return x + y;
149 }
150 
151 __device__ static inline float __fadd_rn(float x, float y) {
152  return x + y;
153 }
154 
155 __device__ static inline float __fadd_ru(float x, float y) {
156  return x + y;
157 }
158 
159 __device__ static inline float __fadd_rz(float x, float y) {
160  return x + y;
161 }
162 
163 __device__ static inline float __fdiv_rd(float x, float y) {
164  return x / y;
165 }
166 
167 __device__ static inline float __fdiv_rn(float x, float y) {
168  return x / y;
169 }
170 
171 __device__ static inline float __fdiv_ru(float x, float y) {
172  return x / y;
173 }
174 
175 __device__ static inline float __fdiv_rz(float x, float y) {
176  return x / y;
177 }
178 
179 __device__ static inline float __fdividef(float x, float y) {
180  return x / y;
181 }
182 
183 __device__ inline float __fmaf_rd(float x, float y, float z) {
184  return __hip_fast_fmaf(x, y, z);
185 }
186 
187 __device__ inline float __fmaf_rn(float x, float y, float z) {
188  return __hip_fast_fmaf(x, y, z);
189 }
190 
191 __device__ inline float __fmaf_ru(float x, float y, float z) {
192  return __hip_fast_fmaf(x, y, z);
193 }
194 
195 __device__ inline float __fmaf_rz(float x, float y, float z) {
196  return __hip_fast_fmaf(x, y, z);
197 }
198 
199 __device__ static inline float __fmul_rd(float x, float y) {
200  return x * y;
201 }
202 
203 __device__ static inline float __fmul_rn(float x, float y) {
204  return x * y;
205 }
206 
207 __device__ static inline float __fmul_ru(float x, float y) {
208  return x * y;
209 }
210 
211 __device__ static inline float __fmul_rz(float x, float y) {
212  return x * y;
213 }
214 
215 __device__ inline float __frcp_rd(float x) {
216  return __hip_fast_frcp(x);
217 }
218 
219 __device__ inline float __frcp_rn(float x) {
220  return __hip_fast_frcp(x);
221 }
222 
223 __device__ inline float __frcp_ru(float x) {
224  return __hip_fast_frcp(x);
225 }
226 
227 __device__ inline float __frcp_rz(float x) {
228  return __hip_fast_frcp(x);
229 }
230 
231 __device__ inline float __frsqrt_rn(float x) {
232  return __hip_fast_frsqrt_rn(x);
233 }
234 
235 __device__ inline float __fsqrt_rd(float x) {
236  return __hip_fast_fsqrt_rd(x);
237 }
238 
239 __device__ inline float __fsqrt_rn(float x) {
240  return __hip_fast_fsqrt_rn(x);
241 }
242 
243 __device__ inline float __fsqrt_ru(float x) {
244  return __hip_fast_fsqrt_ru(x);
245 }
246 
247 __device__ inline float __fsqrt_rz(float x) {
248  return __hip_fast_fsqrt_rz(x);
249 }
250 
251 __device__ static inline float __fsub_rd(float x, float y) {
252  return x - y;
253 }
254 
255 __device__ static inline float __fsub_rn(float x, float y) {
256  return x - y;
257 }
258 
259 __device__ static inline float __fsub_ru(float x, float y) {
260  return x - y;
261 }
262 
263 __device__ static inline float __fsub_rz(float x, float y) {
264  return x - y;
265 }
266 
267 
268 __device__ inline float __log10f(float x) {
269  return __hip_fast_log10f(x);
270 }
271 
272 __device__ inline float __log2f(float x) {
273  return __hip_fast_log2f(x);
274 }
275 
276 __device__ inline float __logf(float x) {
277  return __hip_fast_logf(x);
278 }
279 
280 __device__ inline float __powf(float base, float exponent) {
281  return __hip_fast_powf(base, exponent);
282 }
283 
284 __device__ static inline float __saturatef(float x) {
285  x = x > 1.0f ? 1.0f : x;
286  x = x < 0.0f ? 0.0f : x;
287  return x;
288 }
289 
290 __device__ inline void __sincosf(float x, float *s, float *c) {
291  return __hip_fast_sincosf(x, s, c);
292 }
293 
294 __device__ inline float __sinf(float x) {
295  return __hip_fast_sinf(x);
296 }
297 
298 __device__ inline float __tanf(float x) {
299  return __hip_fast_tanf(x);
300 }
301 
302 
303 /*
304 Double Precision Intrinsics
305 */
306 
307 __device__ static inline double __dadd_rd(double x, double y) {
308  return x + y;
309 }
310 
311 __device__ static inline double __dadd_rn(double x, double y) {
312  return x + y;
313 }
314 
315 __device__ static inline double __dadd_ru(double x, double y) {
316  return x + y;
317 }
318 
319 __device__ static inline double __dadd_rz(double x, double y) {
320  return x + y;
321 }
322 
323 __device__ static inline double __ddiv_rd(double x, double y) {
324  return x / y;
325 }
326 
327 __device__ static inline double __ddiv_rn(double x, double y) {
328  return x / y;
329 }
330 
331 __device__ static inline double __ddiv_ru(double x, double y) {
332  return x / y;
333 }
334 
335 __device__ static inline double __ddiv_rz(double x, double y) {
336  return x / y;
337 }
338 
339 __device__ static inline double __dmul_rd(double x, double y) {
340  return x * y;
341 }
342 
343 __device__ static inline double __dmul_rn(double x, double y) {
344  return x * y;
345 }
346 
347 __device__ static inline double __dmul_ru(double x, double y) {
348  return x * y;
349 }
350 
351 __device__ static inline double __dmul_rz(double x, double y) {
352  return x * y;
353 }
354 
355 __device__ inline double __drcp_rd(double x) {
356  return __hip_fast_drcp(x);
357 }
358 
359 __device__ inline double __drcp_rn(double x) {
360  return __hip_fast_drcp(x);
361 }
362 
363 __device__ inline double __drcp_ru(double x) {
364  return __hip_fast_drcp(x);
365 }
366 
367 __device__ inline double __drcp_rz(double x) {
368  return __hip_fast_drcp(x);
369 }
370 
371 
372 __device__ inline double __dsqrt_rd(double x) {
373  return __hip_fast_dsqrt(x);
374 }
375 
376 __device__ inline double __dsqrt_rn(double x) {
377  return __hip_fast_dsqrt(x);
378 }
379 
380 __device__ inline double __dsqrt_ru(double x) {
381  return __hip_fast_dsqrt(x);
382 }
383 
384 __device__ inline double __dsqrt_rz(double x) {
385  return __hip_fast_dsqrt(x);
386 }
387 
388 __device__ static inline double __dsub_rd(double x, double y) {
389  return x - y;
390 }
391 
392 __device__ static inline double __dsub_rn(double x, double y) {
393  return x - y;
394 }
395 
396 __device__ static inline double __dsub_ru(double x, double y) {
397  return x - y;
398 }
399 
400 __device__ static inline double __dsub_rz(double x, double y) {
401  return x - y;
402 }
403 
404 __device__ inline double __fma_rd(double x, double y, double z) {
405  return __hip_fast_fma(x, y, z);
406 }
407 
408 __device__ inline double __fma_rn(double x, double y, double z) {
409  return __hip_fast_fma(x, y, z);
410 }
411 
412 __device__ inline double __fma_ru(double x, double y, double z) {
413  return __hip_fast_fma(x, y, z);
414 }
415 
416 __device__ inline double __fma_rz(double x, double y, double z) {
417  return __hip_fast_fma(x, y, z);
418 }
419 
420 
421 extern "C" unsigned int __hip_hc_ir_umul24_int(unsigned int, unsigned int);
422 extern "C" signed int __hip_hc_ir_mul24_int(signed int, signed int);
423 extern "C" signed int __hip_hc_ir_mulhi_int(signed int, signed int);
424 extern "C" unsigned int __hip_hc_ir_umulhi_int(unsigned int, unsigned int);
425 extern "C" unsigned int __hip_hc_ir_usad_int(unsigned int, unsigned int, unsigned int);
426 
427 // integer intrinsic function __poc __clz __ffs __brev
428 __device__ unsigned int __brev( unsigned int x);
429 __device__ unsigned long long int __brevll( unsigned long long int x);
430 __device__ unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s);
431 __device__ unsigned int __clz(int x);
432 __device__ unsigned int __clzll(long long int x);
433 __device__ unsigned int __ffs(int x);
434 __device__ unsigned int __ffsll(long long int x);
435 __device__ static unsigned int __hadd(int x, int y);
436 __device__ static int __mul24(int x, int y);
437 __device__ long long int __mul64hi(long long int x, long long int y);
438 __device__ static int __mulhi(int x, int y);
439 __device__ unsigned int __popc(unsigned int x);
440 __device__ unsigned int __popcll(unsigned long long int x);
441 __device__ static int __rhadd(int x, int y);
442 __device__ static unsigned int __sad(int x, int y, int z);
443 __device__ static unsigned int __uhadd(unsigned int x, unsigned int y);
444 __device__ static int __umul24(unsigned int x, unsigned int y);
445 __device__ unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y);
446 __device__ static unsigned int __umulhi(unsigned int x, unsigned int y);
447 __device__ static unsigned int __urhadd(unsigned int x, unsigned int y);
448 __device__ static unsigned int __usad(unsigned int x, unsigned int y, unsigned int z);
449 
450 __device__ static inline unsigned int __hadd(int x, int y) {
451  int z = x + y;
452  int sign = z & 0x8000000;
453  int value = z & 0x7FFFFFFF;
454  return ((value) >> 1 || sign);
455 }
456 __device__ static inline int __mul24(int x, int y) {
457  return __hip_hc_ir_mul24_int(x, y);
458 }
459 __device__ static inline int __mulhi(int x, int y) {
460  return __hip_hc_ir_mulhi_int(x, y);
461 }
462 __device__ static inline int __rhadd(int x, int y) {
463  int z = x + y + 1;
464  int sign = z & 0x8000000;
465  int value = z & 0x7FFFFFFF;
466  return ((value) >> 1 || sign);
467 }
468 __device__ static inline unsigned int __sad(int x, int y, int z) {
469  return x > y ? x - y + z : y - x + z;
470 }
471 __device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) {
472  return (x + y) >> 1;
473 }
474 __device__ static inline int __umul24(unsigned int x, unsigned int y) {
475  return __hip_hc_ir_umul24_int(x, y);
476 }
477 __device__ static inline unsigned int __umulhi(unsigned int x, unsigned int y) {
478  return __hip_hc_ir_umulhi_int(x, y);
479 }
480 __device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) {
481  return (x + y + 1) >> 1;
482 }
483 __device__ static inline unsigned int __usad(unsigned int x, unsigned int y, unsigned int z)
484 {
485  return __hip_hc_ir_usad_int(x, y, z);
486 }
487 
488 /*
489 Rounding modes are not yet supported in HIP
490 */
491 
492 __device__ float __double2float_rd(double x);
493 __device__ float __double2float_rn(double x);
494 __device__ float __double2float_ru(double x);
495 __device__ float __double2float_rz(double x);
496 
497 __device__ int __double2hiint(double x);
498 
499 __device__ int __double2int_rd(double x);
500 __device__ int __double2int_rn(double x);
501 __device__ int __double2int_ru(double x);
502 __device__ int __double2int_rz(double x);
503 
504 __device__ long long int __double2ll_rd(double x);
505 __device__ long long int __double2ll_rn(double x);
506 __device__ long long int __double2ll_ru(double x);
507 __device__ long long int __double2ll_rz(double x);
508 
509 __device__ int __double2loint(double x);
510 
511 __device__ unsigned int __double2uint_rd(double x);
512 __device__ unsigned int __double2uint_rn(double x);
513 __device__ unsigned int __double2uint_ru(double x);
514 __device__ unsigned int __double2uint_rz(double x);
515 
516 __device__ unsigned long long int __double2ull_rd(double x);
517 __device__ unsigned long long int __double2ull_rn(double x);
518 __device__ unsigned long long int __double2ull_ru(double x);
519 __device__ unsigned long long int __double2ull_rz(double x);
520 
521 __device__ long long int __double_as_longlong(double x);
522 /*
523 __device__ unsigned short __float2half_rn(float x);
524 __device__ float __half2float(unsigned short);
525 
526 The above device function are not a valid .
527 Use
528 __device__ __half __float2half_rn(float x);
529 __device__ float __half2float(__half);
530 from hip_fp16.h
531 
532 CUDA implements half as unsigned short whereas, HIP doesn't.
533 
534 */
535 
536 __device__ int __float2int_rd(float x);
537 __device__ int __float2int_rn(float x);
538 __device__ int __float2int_ru(float x);
539 __device__ int __float2int_rz(float x);
540 
541 __device__ long long int __float2ll_rd(float x);
542 __device__ long long int __float2ll_rn(float x);
543 __device__ long long int __float2ll_ru(float x);
544 __device__ long long int __float2ll_rz(float x);
545 
546 __device__ unsigned int __float2uint_rd(float x);
547 __device__ unsigned int __float2uint_rn(float x);
548 __device__ unsigned int __float2uint_ru(float x);
549 __device__ unsigned int __float2uint_rz(float x);
550 
551 __device__ unsigned long long int __float2ull_rd(float x);
552 __device__ unsigned long long int __float2ull_rn(float x);
553 __device__ unsigned long long int __float2ull_ru(float x);
554 __device__ unsigned long long int __float2ull_rz(float x);
555 
556 __device__ int __float_as_int(float x);
557 __device__ unsigned int __float_as_uint(float x);
558 __device__ double __hiloint2double(int hi, int lo);
559 __device__ double __int2double_rn(int x);
560 
561 __device__ float __int2float_rd(int x);
562 __device__ float __int2float_rn(int x);
563 __device__ float __int2float_ru(int x);
564 __device__ float __int2float_rz(int x);
565 
566 __device__ float __int_as_float(int x);
567 
568 __device__ double __ll2double_rd(long long int x);
569 __device__ double __ll2double_rn(long long int x);
570 __device__ double __ll2double_ru(long long int x);
571 __device__ double __ll2double_rz(long long int x);
572 
573 __device__ float __ll2float_rd(long long int x);
574 __device__ float __ll2float_rn(long long int x);
575 __device__ float __ll2float_ru(long long int x);
576 __device__ float __ll2float_rz(long long int x);
577 
578 __device__ double __longlong_as_double(long long int x);
579 
580 __device__ double __uint2double_rn(int x);
581 
582 __device__ float __uint2float_rd(unsigned int x);
583 __device__ float __uint2float_rn(unsigned int x);
584 __device__ float __uint2float_ru(unsigned int x);
585 __device__ float __uint2float_rz(unsigned int x);
586 
587 __device__ float __uint_as_float(unsigned int x);
588 
589 __device__ double __ull2double_rd(unsigned long long int x);
590 __device__ double __ull2double_rn(unsigned long long int x);
591 __device__ double __ull2double_ru(unsigned long long int x);
592 __device__ double __ull2double_rz(unsigned long long int x);
593 
594 __device__ float __ull2float_rd(unsigned long long int x);
595 __device__ float __ull2float_rn(unsigned long long int x);
596 __device__ float __ull2float_ru(unsigned long long int x);
597 __device__ float __ull2float_rz(unsigned long long int x);
598 
599 __device__ char4 __hip_hc_add8pk(char4, char4);
600 __device__ char4 __hip_hc_sub8pk(char4, char4);
601 __device__ char4 __hip_hc_mul8pk(char4, char4);
602 
603 
604 #endif
Definition: hip_vector_types.h:235