HIP: Heterogenous-computing Interface for Portability
device_util.h
1 /*
2 Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #ifndef DEVICE_UTIL_H
24 #define DEVICE_UTIL_H
25 
27 
28 /*
29  Heap size computation for malloc and free device functions.
30 */
31 
32 #define NUM_PAGES_PER_THREAD 16
33 #define SIZE_OF_PAGE 64
34 #define NUM_THREADS_PER_CU 64
35 #define NUM_CUS_PER_GPU 64 // Specific for r9 Nano
36 #define NUM_PAGES NUM_PAGES_PER_THREAD * NUM_THREADS_PER_CU * NUM_CUS_PER_GPU
37 #define SIZE_MALLOC NUM_PAGES * SIZE_OF_PAGE
38 #define SIZE_OF_HEAP SIZE_MALLOC
39 
40 #define HIP_SQRT_2 1.41421356237
41 #define HIP_SQRT_PI 1.77245385091
42 
43 #define __hip_erfinva3 -0.140543331
44 #define __hip_erfinva2 0.914624893
45 #define __hip_erfinva1 -1.645349621
46 #define __hip_erfinva0 0.886226899
47 
48 #define __hip_erfinvb4 0.012229801
49 #define __hip_erfinvb3 -0.329097515
50 #define __hip_erfinvb2 1.442710462
51 #define __hip_erfinvb1 -2.118377725
52 #define __hip_erfinvb0 1
53 
54 #define __hip_erfinvc3 1.641345311
55 #define __hip_erfinvc2 3.429567803
56 #define __hip_erfinvc1 -1.62490649
57 #define __hip_erfinvc0 -1.970840454
58 
59 #define __hip_erfinvd2 1.637067800
60 #define __hip_erfinvd1 3.543889200
61 #define __hip_erfinvd0 1
62 
63 #define HIP_PI 3.14159265358979323846
64 
65 __device__ void* __hip_hc_malloc(size_t size);
66 __device__ void* __hip_hc_free(void* ptr);
67 
68 __device__ float __hip_erfinvf(float x);
69 __device__ double __hip_erfinv(double x);
70 
71 __device__ float __hip_j0f(float x);
72 __device__ double __hip_j0(double x);
73 
74 __device__ float __hip_j1f(float x);
75 __device__ double __hip_j1(double x);
76 
77 __device__ float __hip_y0f(float x);
78 __device__ double __hip_y0(double x);
79 
80 __device__ float __hip_y1f(float x);
81 __device__ double __hip_y1(double x);
82 
83 __device__ float __hip_jnf(int n, float x);
84 __device__ double __hip_jn(int n, double x);
85 
86 __device__ float __hip_ynf(int n, float x);
87 __device__ double __hip_yn(int n, double x);
88 
89 __device__ float __hip_precise_cosf(float x);
90 __device__ float __hip_precise_exp10f(float x);
91 __device__ float __hip_precise_expf(float x);
92 __device__ float __hip_precise_frsqrt_rn(float x);
93 __device__ float __hip_precise_fsqrt_rd(float x);
94 __device__ float __hip_precise_fsqrt_rn(float x);
95 __device__ float __hip_precise_fsqrt_ru(float x);
96 __device__ float __hip_precise_fsqrt_rz(float x);
97 __device__ float __hip_precise_log10f(float x);
98 __device__ float __hip_precise_log2f(float x);
99 __device__ float __hip_precise_logf(float x);
100 __device__ float __hip_precise_powf(float base, float exponent);
101 __device__ void __hip_precise_sincosf(float x, float *s, float *c);
102 __device__ float __hip_precise_sinf(float x);
103 __device__ float __hip_precise_tanf(float x);
104 // Double Precision Math
105 __device__ double __hip_precise_dsqrt_rd(double x);
106 __device__ double __hip_precise_dsqrt_rn(double x);
107 __device__ double __hip_precise_dsqrt_ru(double x);
108 __device__ double __hip_precise_dsqrt_rz(double x);
109 
110 
111 
112 // Float Fast Math
113 __device__ float __hip_fast_exp10f(float x);
114 __device__ float __hip_fast_expf(float x);
115 __device__ float __hip_fast_frsqrt_rn(float x);
116 __device__ float __hip_fast_fsqrt_rn(float x);
117 __device__ float __hip_fast_fsqrt_ru(float x);
118 __device__ float __hip_fast_fsqrt_rz(float x);
119 __device__ float __hip_fast_log10f(float x);
120 __device__ float __hip_fast_logf(float x);
121 __device__ float __hip_fast_powf(float base, float exponent);
122 __device__ void __hip_fast_sincosf(float x, float *s, float *c);
123 __device__ float __hip_fast_tanf(float x);
124 // Double Precision Math
125 __device__ double __hip_fast_dsqrt_rd(double x);
126 __device__ double __hip_fast_dsqrt_rn(double x);
127 __device__ double __hip_fast_dsqrt_ru(double x);
128 __device__ double __hip_fast_dsqrt_rz(double x);
129 __device__ void __threadfence_system(void);
130 
131 float __hip_host_erfinvf(float x);
132 double __hip_host_erfinv(double x);
133 
134 float __hip_host_erfcinvf(float y);
135 double __hip_host_erfcinv(double y);
136 
137 float __hip_host_j0f(float x);
138 double __hip_host_j0(double x);
139 
140 float __hip_host_j1f(float x);
141 double __hip_host_j1(double x);
142 
143 float __hip_host_y0f(float x);
144 double __hip_host_y1(double x);
145 
146 float __hip_host_y1f(float x);
147 double __hip_host_y1(double x);
148 
149 float __hip_host_jnf(int n, float x);
150 double __hip_host_jn(int n, double x);
151 
152 float __hip_host_ynf(int n, float x);
153 double __hip_host_yn(int n, double x);
154 
155 #endif
__device__ void __threadfence_system(void)
threadfence_system makes writes to pinned system memory visible on host CPU.
Definition: device_util.cpp:1266
Contains definitions of APIs for HIP runtime.