23 #ifndef HIP_SRC_HIP_HCC_INTERNAL_H 24 #define HIP_SRC_HIP_HCC_INTERNAL_H 28 #include "hsa/hsa_ext_amd.h" 30 #include "hip/hip_runtime.h" 35 #if defined(__HCC__) && (__hcc_workweek__ < 16354) 36 #error("This version of HIP requires a newer version of HCC."); 46 extern const int release;
49 extern int HIP_LAUNCH_BLOCKING;
50 extern int HIP_API_BLOCKING;
52 extern int HIP_PRINT_ENV;
53 extern int HIP_PROFILE_API;
57 extern int HIP_STAGING_SIZE;
58 extern int HIP_STREAM_SIGNALS;
59 extern int HIP_VISIBLE_DEVICES;
60 extern int HIP_FORCE_P2P_HOST;
62 extern int HIP_COHERENT_HOST_ALLOC;
64 extern int HIP_HIDDEN_FREE_MEM;
67 extern int HIP_SYNC_HOST_ALLOC;
69 extern int HIP_SYNC_NULL_STREAM;
72 extern int HCC_OPT_FLUSH;
81 int tid()
const {
return _shortTid; };
82 uint64_t incApiSeqNum() {
return ++_apiSeqNum; };
83 uint64_t apiSeqNum()
const {
return _apiSeqNum; };
94 static const uint64_t MAX_TRIGGER = std::numeric_limits<uint64_t>::max();
96 void print (
int tid) {
97 std::cout <<
"Enabling tracing for ";
98 for (
auto iter=_profTrigger.begin(); iter != _profTrigger.end(); iter++) {
99 std::cout <<
"tid:" << tid <<
"." << *iter <<
",";
104 uint64_t nextTrigger() {
return _profTrigger.empty() ? MAX_TRIGGER : _profTrigger.back(); };
105 void add(uint64_t trigger) { _profTrigger.push_back(trigger); };
106 void sort() { std::sort (_profTrigger.begin(), _profTrigger.end(), std::greater<int>()); };
108 std::vector<uint64_t> _profTrigger;
115 extern thread_local
hipError_t tls_lastHipError;
116 extern thread_local
TidInfo tls_tidInfo;
118 extern std::vector<ProfTrigger> g_dbStartTriggers;
119 extern std::vector<ProfTrigger> g_dbStopTriggers;
128 #define KNRM "\x1B[0m" 129 #define KRED "\x1B[31m" 130 #define KGRN "\x1B[32m" 131 #define KYEL "\x1B[33m" 132 #define KBLU "\x1B[34m" 133 #define KMAG "\x1B[35m" 134 #define KCYN "\x1B[36m" 135 #define KWHT "\x1B[37m" 137 extern const char *API_COLOR;
138 extern const char *API_COLOR_END;
143 #define STREAM_THREAD_SAFE 1 146 #define CTX_THREAD_SAFE 1 148 #define DEVICE_THREAD_SAFE 1 153 #define COMPILE_HIP_DB 1 161 #define COMPILE_HIP_TRACE_API 0x3 166 #ifndef COMPILE_HIP_ATP_MARKER 167 #define COMPILE_HIP_ATP_MARKER 0 176 #if COMPILE_HIP_ATP_MARKER 177 #include "CXLActivityLogger.h" 178 #define MARKER_BEGIN(markerName,group) amdtBeginMarker(markerName, group, nullptr); 179 #define MARKER_END() amdtEndMarker(); 180 #define RESUME_PROFILING amdtResumeProfiling(AMDT_ALL_PROFILING); 181 #define STOP_PROFILING amdtStopProfiling(AMDT_ALL_PROFILING); 184 #define MARKER_BEGIN(markerName,group) 186 #define RESUME_PROFILING 187 #define STOP_PROFILING 193 #define TRACE_ALL 0 // 0x1 194 #define TRACE_KCMD 1 // 0x2, kernel command 195 #define TRACE_MCMD 2 // 0x4, memory command 196 #define TRACE_MEM 3 // 0x8, memory allocation or deallocation. 205 #define DB_MAX_FLAG 4 212 const char *_shortName;
216 static const DbName dbName [] =
227 #define tprintf(trace_level, ...) {\ 228 if (HIP_DB & (1<<(trace_level))) {\ 230 snprintf(msgStr, 2000, __VA_ARGS__);\ 231 fprintf (stderr, " %ship-%s tid:%d:%s%s", dbName[trace_level]._color, dbName[trace_level]._shortName, tls_tidInfo.tid(), msgStr, KNRM); \ 236 #define tprintf(trace_level, ...) 243 extern void recordApiTrace(std::string *fullStr,
const std::string &apiStr);
245 #if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1) 246 #define API_TRACE(forceTrace, ...)\ 248 tls_tidInfo.incApiSeqNum();\ 249 if (forceTrace || (HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1<<TRACE_ALL))))) {\ 250 std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\ 251 std::string fullStr;\ 252 recordApiTrace(&fullStr, apiStr);\ 253 if (HIP_PROFILE_API == 0x1) {MARKER_BEGIN(__func__, "HIP") }\ 254 else if (HIP_PROFILE_API == 0x2) {MARKER_BEGIN(fullStr.c_str(), "HIP"); }\ 259 #define API_TRACE(IS_CMD, ...)\ 260 tls_tidInfo.incApiSeqNum(); 266 std::call_once(hip_initialized, ihipInit);\ 267 ihipCtxStackUpdate(); 268 #define HIP_SET_DEVICE()\ 269 ihipDeviceSetState(); 276 #define HIP_INIT_API(...) \ 278 API_TRACE(0, __VA_ARGS__); 284 #define HIP_INIT_SPECIAL_API(tbit, ...) \ 286 API_TRACE((HIP_TRACE_API&(1<<tbit)), __VA_ARGS__); 292 #define ihipLogStatus(hipStatus) \ 294 hipError_t localHipStatus = hipStatus; \ 295 tls_lastHipError = localHipStatus;\ 297 if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1<<TRACE_ALL)) {\ 298 fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\ 300 if (HIP_PROFILE_API) { MARKER_END(); }\ 333 #define HIP_IPC_RESERVED_SIZE 24 341 char reserved[HIP_IPC_RESERVED_SIZE];
347 hsa_executable_t executable;
348 hsa_code_object_t object;
349 std::string fileName;
352 std::list<hipFunction_t> funcTrack;
353 ihipModule_t() : executable(), object(), fileName(), ptr(
nullptr), size(0) {}
363 bool try_lock() {
return true; }
368 #if STREAM_THREAD_SAFE 369 typedef std::mutex StreamMutex;
371 #warning "Stream thread-safe disabled" 377 typedef std::mutex CtxMutex;
380 #warning "Ctx thread-safe disabled" 383 #if DEVICE_THREAD_SAFE 384 typedef std::mutex DeviceMutex;
387 #warning "Device thread-safe disabled" 399 _criticalData(&criticalData),
400 _autoUnlock(autoUnlock)
403 tprintf(DB_SYNC,
"locking criticalData=%p for %s..\n", _criticalData, ToString(_criticalData->_parent).c_str());
404 _criticalData->_mutex.lock();
410 tprintf(DB_SYNC,
"auto-unlocking criticalData=%p for %s...\n", _criticalData, ToString(_criticalData->_parent).c_str());
411 _criticalData->_mutex.unlock();
417 tprintf(DB_SYNC,
"unlocking criticalData=%p for %s...\n", _criticalData, ToString(_criticalData->_parent).c_str());
418 _criticalData->_mutex.unlock();
422 T *operator->() {
return _criticalData; };
430 template <
typename MUTEX_TYPE>
435 void lock() { _mutex.lock(); }
436 void unlock() { _mutex.unlock(); }
437 bool try_lock() {
return _mutex.try_lock(); }
443 template <
typename MUTEX_TYPE>
450 _parent(parentStream)
460 tprintf(DB_SYNC,
"munlocking criticalData=%p for %s...\n",
this, ToString(this->_parent).c_str());
466 tprintf(DB_SYNC,
"mtry_locking=%d criticalData=%p for %s...\n", gotLock,
this, ToString(this->_parent).c_str());
467 return gotLock ?
this:
nullptr;
474 hc::accelerator_view _av;
493 enum ScheduleMode {Auto, Spin, Yield};
494 typedef uint64_t SeqNum_t ;
501 void locked_copySync (
void* dst,
const void* src,
size_t sizeBytes,
unsigned kind,
bool resolveOn =
true);
502 void locked_copyAsync(
void* dst,
const void* src,
size_t sizeBytes,
unsigned kind);
504 void lockedSymbolCopySync(hc::accelerator &acc,
void *dst,
void* src,
size_t sizeBytes,
size_t offset,
unsigned kind);
505 void lockedSymbolCopyAsync(hc::accelerator &acc,
void *dst,
void* src,
size_t sizeBytes,
size_t offset,
unsigned kind);
510 void lockclose_postKernelCommand(
const char *kernelName, hc::accelerator_view *av);
523 hc::hcWaitMode waitMode()
const;
528 void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal,
529 uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
530 uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
531 uint32_t groupSegmentSize, uint32_t sharedMemBytes,
532 void *kernarg,
size_t kernSize, uint64_t kernel);
542 bool isDefaultStream()
const {
return _id == 0; };
555 unsigned resolveMemcpyDirection(
bool srcInDeviceMem,
bool dstInDeviceMem);
556 void resolveHcMemcpyDirection(
unsigned hipMemKind,
557 const hc::AmPointerInfo *dstPtrInfo,
const hc::AmPointerInfo *srcPtrInfo,
558 hc::hcCommandKind *hcCopyDir,
560 bool *forceUnpinnedCopy);
562 bool canSeeMemory(
const ihipCtx_t *thisCtx,
const hc::AmPointerInfo *dstInfo,
const hc::AmPointerInfo *srcInfo);
564 void addSymbolPtrToTracker(hc::accelerator& acc,
void* ptr,
size_t sizeBytes);
573 std::mutex _hasQueueLock;
578 friend std::ostream& operator<<(std::ostream& os,
const ihipStream_t& s);
581 ScheduleMode _scheduleMode;
588 enum hipEventStatus_t {
589 hipEventStatusUnitialized = 0,
590 hipEventStatusCreated = 1,
591 hipEventStatusRecording = 2,
592 hipEventStatusComplete = 3,
596 enum ihipEventType_t {
597 hipEventTypeIndependent,
598 hipEventTypeStartCommand,
599 hipEventTypeStopCommand,
606 void attachToCompletionFuture(
const hc::completion_future *cf,
hipStream_t stream, ihipEventType_t eventType);
607 void refereshEventStatus();
608 uint64_t timestamp()
const {
return _timestamp; } ;
609 ihipEventType_t type()
const {
return _type; };
612 hipEventStatus_t _state;
617 hc::completion_future _marker;
620 ihipEventType_t _type;
629 template <
typename MUTEX_TYPE>
634 _parent(parentDevice)
645 std::list<ihipCtx_t*> &ctxs() {
return _ctxs; };
646 const std::list<ihipCtx_t*> &const_ctxs()
const {
return _ctxs; };
647 int getcount() {
return _ctxCount;};
653 std::list< ihipCtx_t* > _ctxs;
668 ihipDevice_t(
unsigned deviceId,
unsigned deviceCnt, hc::accelerator &acc);
672 ihipCtx_t *getPrimaryCtx()
const {
return _primaryCtx; };
679 hc::accelerator _acc;
680 hsa_agent_t _hsaAgent;
704 template <
typename MUTEX_TYPE>
712 _peerAgents =
new hsa_agent_t[deviceCnt];
716 if (_peerAgents !=
nullptr) {
718 _peerAgents =
nullptr;
725 std::list<ihipStream_t*> &streams() {
return _streams; };
726 const std::list<ihipStream_t*> &const_streams()
const {
return _streams; };
731 bool isPeerWatcher(
const ihipCtx_t *peer);
734 void resetPeerWatchers(
ihipCtx_t *thisDevice);
735 void printPeerWatchers(FILE *f)
const;
737 uint32_t peerCnt()
const {
return _peerCnt; };
738 hsa_agent_t *peerAgents()
const {
return _peerAgents; };
742 std::list<ihipCtx_t*> _peers;
749 std::list< ihipStream_t* > _streams;
757 hsa_agent_t *_peerAgents;
759 void recomputePeerAgents();
788 void locked_waitAllStreams();
789 void locked_syncDefaultStream(
bool waitOnSelf,
bool syncHost);
793 const ihipDevice_t *getDevice()
const {
return _device; };
794 int getDeviceNum()
const {
return _device->_deviceId; };
797 ihipDevice_t *getWriteableDevice()
const {
return _device; };
799 std::string toString()
const;
824 extern std::once_flag hip_initialized;
825 extern unsigned g_deviceCnt;
826 extern hsa_agent_t g_cpu_agent ;
827 extern hsa_agent_t *g_allAgents;
831 extern void ihipInit();
832 extern const char *ihipErrorString(
hipError_t);
833 extern ihipCtx_t *ihipGetTlsDefaultCtx();
834 extern void ihipSetTlsDefaultCtx(
ihipCtx_t *ctx);
836 extern void ihipCtxStackUpdate();
840 ihipCtx_t * ihipGetPrimaryCtx(
unsigned deviceIndex);
846 inline std::ostream& operator<<(std::ostream& os,
const ihipStream_t& s)
849 os << s.getDevice()->_deviceId;;
855 inline std::ostream & operator<<(std::ostream& os,
const dim3& s)
867 inline std::ostream & operator<<(std::ostream& os,
const gl_dim3& s)
880 inline std::ostream& operator<<(std::ostream& os,
const hipEvent_t& e)
882 os <<
"event:" << std::hex << static_cast<void*> (e);
886 inline std::ostream& operator<<(std::ostream& os,
const ihipCtx_t* c)
888 os <<
"ctx:" <<
static_cast<const void*
> (c)
889 <<
".dev:" << c->getDevice()->_deviceId;
Definition: hip_hcc_internal.h:210
Definition: hip_hcc_internal.h:665
Definition: hip_hcc_internal.h:431
Definition: hip_hcc_internal.h:92
Definition: hip_hcc_internal.h:359
Definition: hip_hcc_internal.h:334
uint32_t x
x
Definition: hip_runtime_api.h:194
Definition: hip_hcc_internal.h:775
Definition: hip_runtime_api.h:193
uint32_t y
y
Definition: hip_runtime_api.h:195
hipError_t
Definition: hip_runtime_api.h:154
hipMemcpyKind
Definition: hip_runtime_api.h:207
Definition: hip_hcc_internal.h:345
Definition: hip_hcc_internal.h:630
unsigned _computeUnits
Number of compute units supported by the device:
Definition: hip_hcc_internal.h:683
uint32_t z
z
Definition: hip_runtime_api.h:196
Definition: hip_runtime_api.h:83
Definition: hip_hcc_internal.h:895
Definition: hip_hcc_internal.h:309
Definition: hip_hcc_internal.h:603
Definition: hip_hcc_internal.h:705
Definition: hip_hcc_internal.h:491
Definition: hip_hcc_internal.h:444
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
Definition: hip_event.cpp:128
hsa_amd_ipc_memory_t ipc_handle
ipc memory handle on ROCr
Definition: hip_hcc_internal.h:338
Definition: hip_hcc_internal.h:395
hipError_t hipStreamQuery(hipStream_t stream)
Return hipSuccess if all of the operations in the specified stream have completed, or hipErrorNotReady if not.
Definition: hip_stream.cpp:115
Definition: hip_hcc_internal.h:76