HIP: Heterogenous-computing Interface for Portability
hip_hcc_internal.h
1 /*
2 Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #ifndef HIP_SRC_HIP_HCC_INTERNAL_H
24 #define HIP_SRC_HIP_HCC_INTERNAL_H
25 
26 #include <hc.hpp>
27 #include <hsa/hsa.h>
28 #include "hsa/hsa_ext_amd.h"
29 
30 #include "hip/hip_runtime.h"
31 #include "hip_util.h"
32 #include "env.h"
33 
34 
35 #if defined(__HCC__) && (__hcc_workweek__ < 16354)
36 #error("This version of HIP requires a newer version of HCC.");
37 #endif
38 
39 #define USE_IPC 1
40 
41 //---
42 // Environment variables:
43 
44 // Intended to distinguish whether an environment variable should be visible only in debug mode, or in debug+release.
45 //static const int debug = 0;
46 extern const int release;
47 
48 // TODO - this blocks both kernels and memory ops. Perhaps should have separate env var for kernels?
49 extern int HIP_LAUNCH_BLOCKING;
50 extern int HIP_API_BLOCKING;
51 
52 extern int HIP_PRINT_ENV;
53 extern int HIP_PROFILE_API;
54 //extern int HIP_TRACE_API;
55 extern int HIP_ATP;
56 extern int HIP_DB;
57 extern int HIP_STAGING_SIZE; /* size of staging buffers, in KB */
58 extern int HIP_STREAM_SIGNALS; /* number of signals to allocate at stream creation */
59 extern int HIP_VISIBLE_DEVICES; /* Contains a comma-separated sequence of GPU identifiers */
60 extern int HIP_FORCE_P2P_HOST;
61 
62 extern int HIP_COHERENT_HOST_ALLOC;
63 
64 extern int HIP_HIDDEN_FREE_MEM;
65 //---
66 // Chicken bits for disabling functionality to work around potential issues:
67 extern int HIP_SYNC_HOST_ALLOC;
68 
69 extern int HIP_SYNC_NULL_STREAM;
70 
71 // TODO - remove when this is standard behavior.
72 extern int HCC_OPT_FLUSH;
73 
74 
75 // Class to assign a short TID to each new thread, for HIP debugging purposes.
76 class TidInfo {
77 public:
78 
79  TidInfo() ;
80 
81  int tid() const { return _shortTid; };
82  uint64_t incApiSeqNum() { return ++_apiSeqNum; };
83  uint64_t apiSeqNum() const { return _apiSeqNum; };
84 
85 private:
86  int _shortTid;
87 
88  // monotonically increasing API sequence number for this threa.
89  uint64_t _apiSeqNum;
90 };
91 
92 struct ProfTrigger {
93 
94  static const uint64_t MAX_TRIGGER = std::numeric_limits<uint64_t>::max();
95 
96  void print (int tid) {
97  std::cout << "Enabling tracing for ";
98  for (auto iter=_profTrigger.begin(); iter != _profTrigger.end(); iter++) {
99  std::cout << "tid:" << tid << "." << *iter << ",";
100  }
101  std::cout << "\n";
102  };
103 
104  uint64_t nextTrigger() { return _profTrigger.empty() ? MAX_TRIGGER : _profTrigger.back(); };
105  void add(uint64_t trigger) { _profTrigger.push_back(trigger); };
106  void sort() { std::sort (_profTrigger.begin(), _profTrigger.end(), std::greater<int>()); };
107 private:
108  std::vector<uint64_t> _profTrigger;
109 };
110 
111 
112 
113 //---
114 //Extern tls
115 extern thread_local hipError_t tls_lastHipError;
116 extern thread_local TidInfo tls_tidInfo;
117 
118 extern std::vector<ProfTrigger> g_dbStartTriggers;
119 extern std::vector<ProfTrigger> g_dbStopTriggers;
120 
121 //---
122 //Forward defs:
123 class ihipStream_t;
124 class ihipDevice_t;
125 class ihipCtx_t;
126 
127 // Color defs for debug messages:
128 #define KNRM "\x1B[0m"
129 #define KRED "\x1B[31m"
130 #define KGRN "\x1B[32m"
131 #define KYEL "\x1B[33m"
132 #define KBLU "\x1B[34m"
133 #define KMAG "\x1B[35m"
134 #define KCYN "\x1B[36m"
135 #define KWHT "\x1B[37m"
136 
137 extern const char *API_COLOR;
138 extern const char *API_COLOR_END;
139 
140 
141 // If set, thread-safety is enforced on all stream functions.
142 // Stream functions will acquire a mutex before entering critical sections.
143 #define STREAM_THREAD_SAFE 1
144 
145 
146 #define CTX_THREAD_SAFE 1
147 
148 #define DEVICE_THREAD_SAFE 1
149 
150 
151 // Compile debug trace mode - this prints debug messages to stderr when env var HIP_DB is set.
152 // May be set to 0 to remove debug if checks - possible code size and performance difference?
153 #define COMPILE_HIP_DB 1
154 
155 
156 // Compile HIP tracing capability.
157 // 0x1 = print a string at function entry with arguments.
158 // 0x2 = prints a simple message with function name + return code when function exits.
159 // 0x3 = print both.
160 // Must be enabled at runtime with HIP_TRACE_API
161 #define COMPILE_HIP_TRACE_API 0x3
162 
163 
164 // Compile code that generates trace markers for CodeXL ATP at HIP function begin/end.
165 // ATP is standard CodeXL format that includes timestamps for kernels, HSA RT APIs, and HIP APIs.
166 #ifndef COMPILE_HIP_ATP_MARKER
167 #define COMPILE_HIP_ATP_MARKER 0
168 #endif
169 
170 
171 
172 
173 // Compile support for trace markers that are displayed on CodeXL GUI at start/stop of each function boundary.
174 // TODO - currently we print the trace message at the beginning. if we waited, we could also tls_tidInfo return codes, and any values returned
175 // through ptr-to-args (ie the pointers allocated by hipMalloc).
176 #if COMPILE_HIP_ATP_MARKER
177 #include "CXLActivityLogger.h"
178 #define MARKER_BEGIN(markerName,group) amdtBeginMarker(markerName, group, nullptr);
179 #define MARKER_END() amdtEndMarker();
180 #define RESUME_PROFILING amdtResumeProfiling(AMDT_ALL_PROFILING);
181 #define STOP_PROFILING amdtStopProfiling(AMDT_ALL_PROFILING);
182 #else
183 // Swallow scoped markers:
184 #define MARKER_BEGIN(markerName,group)
185 #define MARKER_END()
186 #define RESUME_PROFILING
187 #define STOP_PROFILING
188 #endif
189 
190 
191 //---
192 //HIP Trace modes - use with HIP_TRACE_API=...
193 #define TRACE_ALL 0 // 0x1
194 #define TRACE_KCMD 1 // 0x2, kernel command
195 #define TRACE_MCMD 2 // 0x4, memory command
196 #define TRACE_MEM 3 // 0x8, memory allocation or deallocation.
197 
198 
199 //---
200 //HIP_DB Debug flags:
201 #define DB_API 0 /* 0x01 - shortcut to enable HIP_TRACE_API on single switch */
202 #define DB_SYNC 1 /* 0x02 - trace synchronization pieces */
203 #define DB_MEM 2 /* 0x04 - trace memory allocation / deallocation */
204 #define DB_COPY 3 /* 0x08 - trace memory copy and peer commands. . */
205 #define DB_MAX_FLAG 4
206 // When adding a new debug flag, also add to the char name table below.
207 //
208 //
209 
210 struct DbName {
211  const char *_color;
212  const char *_shortName;
213 };
214 
215 // This table must be kept in-sync with the defines above.
216 static const DbName dbName [] =
217 {
218  {KGRN, "api"}, // not used,
219  {KYEL, "sync"},
220  {KCYN, "mem"},
221  {KMAG, "copy"},
222 };
223 
224 
225 
226 #if COMPILE_HIP_DB
227 #define tprintf(trace_level, ...) {\
228  if (HIP_DB & (1<<(trace_level))) {\
229  char msgStr[1000];\
230  snprintf(msgStr, 2000, __VA_ARGS__);\
231  fprintf (stderr, " %ship-%s tid:%d:%s%s", dbName[trace_level]._color, dbName[trace_level]._shortName, tls_tidInfo.tid(), msgStr, KNRM); \
232  }\
233 }
234 #else
235 /* Compile to empty code */
236 #define tprintf(trace_level, ...)
237 #endif
238 
239 
240 
241 
242 //---
243 extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
244 
245 #if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1)
246 #define API_TRACE(forceTrace, ...)\
247 {\
248  tls_tidInfo.incApiSeqNum();\
249  if (forceTrace || (HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1<<TRACE_ALL))))) {\
250  std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
251  std::string fullStr;\
252  recordApiTrace(&fullStr, apiStr);\
253  if (HIP_PROFILE_API == 0x1) {MARKER_BEGIN(__func__, "HIP") }\
254  else if (HIP_PROFILE_API == 0x2) {MARKER_BEGIN(fullStr.c_str(), "HIP"); }\
255  }\
256 }
257 #else
258 // Swallow API_TRACE
259 #define API_TRACE(IS_CMD, ...)\
260  tls_tidInfo.incApiSeqNum();
261 #endif
262 
263 
264 // Just initialize the HIP runtime, but don't log any trace information.
265 #define HIP_INIT()\
266  std::call_once(hip_initialized, ihipInit);\
267  ihipCtxStackUpdate();
268 #define HIP_SET_DEVICE()\
269  ihipDeviceSetState();
270 
271 
272 
273 // This macro should be called at the beginning of every HIP API.
274 // It initializes the hip runtime (exactly once), and
275 // generates a trace string that can be output to stderr or to ATP file.
276 #define HIP_INIT_API(...) \
277  HIP_INIT()\
278  API_TRACE(0, __VA_ARGS__);
279 
280 
281 // Like above, but will trace with a specified "special" bit.
282 // Replace HIP_INIT_API with this call inside HIP APIs that launch work on the GPU:
283 // kernel launches, copy commands, memory sets, etc.
284 #define HIP_INIT_SPECIAL_API(tbit, ...) \
285  HIP_INIT()\
286  API_TRACE((HIP_TRACE_API&(1<<tbit)), __VA_ARGS__);
287 
288 
289 // This macro should be called at the end of every HIP API, and only at the end of top-level hip APIS (not internal hip)
290 // It has dual function: logs the last error returned for use by hipGetLastError,
291 // and also prints the closing message when the debug trace is enabled.
292 #define ihipLogStatus(hipStatus) \
293  ({\
294  hipError_t localHipStatus = hipStatus; /*local copy so hipStatus only evaluated once*/ \
295  tls_lastHipError = localHipStatus;\
296  \
297  if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1<<TRACE_ALL)) {\
298  fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
299  }\
300  if (HIP_PROFILE_API) { MARKER_END(); }\
301  localHipStatus;\
302  })
303 
304 
305 
306 
307 
308 
309 class ihipException : public std::exception
310 {
311 public:
312  ihipException(hipError_t e) : _code(e) {};
313 
314  hipError_t _code;
315 };
316 
317 
318 #ifdef __cplusplus
319 extern "C" {
320 #endif
321 
322 
323 #ifdef __cplusplus
324 }
325 #endif
326 
327 const hipStream_t hipStreamNull = 0x0;
328 
329 
333 #define HIP_IPC_RESERVED_SIZE 24
335 {
336 public:
337 #if USE_IPC
338  hsa_amd_ipc_memory_t ipc_handle;
339 #endif
340  size_t psize;
341  char reserved[HIP_IPC_RESERVED_SIZE];
342 };
343 
344 
346 public:
347  hsa_executable_t executable;
348  hsa_code_object_t object;
349  std::string fileName;
350  void *ptr;
351  size_t size;
352  std::list<hipFunction_t> funcTrack;
353  ihipModule_t() : executable(), object(), fileName(), ptr(nullptr), size(0) {}
354 };
355 
356 
357 //---
358 // Used to remove lock, for performance or stimulating bugs.
360 {
361  public:
362  void lock() { }
363  bool try_lock() {return true; }
364  void unlock() { }
365 };
366 
367 
368 #if STREAM_THREAD_SAFE
369 typedef std::mutex StreamMutex;
370 #else
371 #warning "Stream thread-safe disabled"
372 typedef FakeMutex StreamMutex;
373 #endif
374 
375 // Pair Device and Ctx together, these could also be toggled separately if desired.
376 #if CTX_THREAD_SAFE
377 typedef std::mutex CtxMutex;
378 #else
379 typedef FakeMutex CtxMutex;
380 #warning "Ctx thread-safe disabled"
381 #endif
382 
383 #if DEVICE_THREAD_SAFE
384 typedef std::mutex DeviceMutex;
385 #else
386 typedef FakeMutex DeviceMutex;
387 #warning "Device thread-safe disabled"
388 #endif
389 
390 //
391 //---
392 // Protects access to the member _data with a lock acquired on contruction/destruction.
393 // T must contain a _mutex field which meets the BasicLockable requirements (lock/unlock)
394 template<typename T>
396 {
397 public:
398  LockedAccessor(T &criticalData, bool autoUnlock=true) :
399  _criticalData(&criticalData),
400  _autoUnlock(autoUnlock)
401 
402  {
403  tprintf(DB_SYNC, "locking criticalData=%p for %s..\n", _criticalData, ToString(_criticalData->_parent).c_str());
404  _criticalData->_mutex.lock();
405  };
406 
407  ~LockedAccessor()
408  {
409  if (_autoUnlock) {
410  tprintf(DB_SYNC, "auto-unlocking criticalData=%p for %s...\n", _criticalData, ToString(_criticalData->_parent).c_str());
411  _criticalData->_mutex.unlock();
412  }
413  }
414 
415  void unlock()
416  {
417  tprintf(DB_SYNC, "unlocking criticalData=%p for %s...\n", _criticalData, ToString(_criticalData->_parent).c_str());
418  _criticalData->_mutex.unlock();
419  }
420 
421  // Syntactic sugar so -> can be used to get the underlying type.
422  T *operator->() { return _criticalData; };
423 
424 private:
425  T *_criticalData;
426  bool _autoUnlock;
427 };
428 
429 
430 template <typename MUTEX_TYPE>
431 struct LockedBase {
432 
433  // Experts-only interface for explicit locking.
434  // Most uses should use the lock-accessor.
435  void lock() { _mutex.lock(); }
436  void unlock() { _mutex.unlock(); }
437  bool try_lock() { return _mutex.try_lock(); }
438 
439  MUTEX_TYPE _mutex;
440 };
441 
442 
443 template <typename MUTEX_TYPE>
444 class ihipStreamCriticalBase_t : public LockedBase<MUTEX_TYPE>
445 {
446 public:
447  ihipStreamCriticalBase_t(ihipStream_t *parentStream, hc::accelerator_view av) :
448  _kernelCnt(0),
449  _av(av),
450  _parent(parentStream)
451  {
452  };
453 
455  }
456 
458 
459  void munlock() {
460  tprintf(DB_SYNC, "munlocking criticalData=%p for %s...\n", this, ToString(this->_parent).c_str());
462  };
463 
465  bool gotLock = LockedBase<MUTEX_TYPE>::try_lock() ;
466  tprintf(DB_SYNC, "mtry_locking=%d criticalData=%p for %s...\n", gotLock, this, ToString(this->_parent).c_str());
467  return gotLock ? this: nullptr;
468  };
469 
470 public:
471  ihipStream_t * _parent;
472  uint32_t _kernelCnt; // Count of inflight kernels in this stream. Reset at ::wait().
473 
474  hc::accelerator_view _av;
475 private:
476 };
477 
478 
479 // if HIP code needs to acquire locks for both ihipCtx_t and ihipStream_t, it should first acquire the lock
480 // for the ihipCtx_t and then for the individual streams. The locks should not be acquired in reverse order
481 // or deadlock may occur. In some cases, it may be possible to reduce the range where the locks must be held.
482 // HIP routines should avoid acquiring and releasing the same lock during the execution of a single HIP API.
483 // Another option is to use try_lock in the innermost lock query.
484 
485 
488 
489 //---
490 // Internal stream structure.
492 public:
493  enum ScheduleMode {Auto, Spin, Yield};
494  typedef uint64_t SeqNum_t ;
495 
496  // TODOD -make av a reference to avoid shared_ptr overhead?
497  ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int flags);
498  ~ihipStream_t();
499 
500  // kind is hipMemcpyKind
501  void locked_copySync (void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn = true);
502  void locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind);
503 
504  void lockedSymbolCopySync(hc::accelerator &acc, void *dst, void* src, size_t sizeBytes, size_t offset, unsigned kind);
505  void lockedSymbolCopyAsync(hc::accelerator &acc, void *dst, void* src, size_t sizeBytes, size_t offset, unsigned kind);
506 
507  //---
508  // Member functions that begin with locked_ are thread-safe accessors - these acquire / release the critical mutex.
509  LockedAccessor_StreamCrit_t lockopen_preKernelCommand();
510  void lockclose_postKernelCommand(const char *kernelName, hc::accelerator_view *av);
511 
512 
513  void locked_wait();
514 
515  hc::accelerator_view* locked_getAv() { LockedAccessor_StreamCrit_t crit(_criticalData); return &(crit->_av); };
516 
517  void locked_waitEvent(hipEvent_t event);
518  void locked_recordEvent(hipEvent_t event);
519 
520  ihipStreamCritical_t &criticalData() { return _criticalData; };
521 
522  //---
523  hc::hcWaitMode waitMode() const;
524 
525  // Use this if we already have the stream critical data mutex:
526  void wait(LockedAccessor_StreamCrit_t &crit);
527 
528  void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal,
529  uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
530  uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
531  uint32_t groupSegmentSize, uint32_t sharedMemBytes,
532  void *kernarg, size_t kernSize, uint64_t kernel);
533 
534 
535 
536  //-- Non-racy accessors:
537  // These functions access fields set at initialization time and are non-racy (so do not acquire mutex)
538  const ihipDevice_t * getDevice() const;
539  ihipCtx_t * getCtx() const;
540 
541  // Before calling this function, stream must be resolved from "0" to the actual stream:
542  bool isDefaultStream() const { return _id == 0; };
543 
544 public:
545  //---
546  //Public member vars - these are set at initialization and never change:
547  SeqNum_t _id; // monotonic sequence ID. 0 is the default stream.
548  unsigned _flags;
549 
550 
551 private:
552 
553 
554  // The unsigned return is hipMemcpyKind
555  unsigned resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDeviceMem);
556  void resolveHcMemcpyDirection(unsigned hipMemKind,
557  const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo,
558  hc::hcCommandKind *hcCopyDir,
559  ihipCtx_t **copyDevice,
560  bool *forceUnpinnedCopy);
561 
562  bool canSeeMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstInfo, const hc::AmPointerInfo *srcInfo);
563 
564  void addSymbolPtrToTracker(hc::accelerator& acc, void* ptr, size_t sizeBytes);
565 
566 
567 public: // TODO - move private
568  // Critical Data - MUST be accessed through LockedAccessor_StreamCrit_t
569  ihipStreamCritical_t _criticalData;
570 
571 private: // Data
572 
573  std::mutex _hasQueueLock;
574 
575  ihipCtx_t *_ctx; // parent context that owns this stream.
576 
577  // Friends:
578  friend std::ostream& operator<<(std::ostream& os, const ihipStream_t& s);
580 
581  ScheduleMode _scheduleMode;
582 };
583 
584 
585 
586 //----
587 // Internal event structure:
588 enum hipEventStatus_t {
589  hipEventStatusUnitialized = 0, // event is uninitialized, must be "Created" before use.
590  hipEventStatusCreated = 1, // event created, but not yet Recorded
591  hipEventStatusRecording = 2, // event has been recorded into a stream but not completed yet.
592  hipEventStatusComplete = 3, // event has been recorded - timestamps are valid.
593 } ;
594 
595 // TODO - rename to ihip type of some kind
596 enum ihipEventType_t {
597  hipEventTypeIndependent,
598  hipEventTypeStartCommand,
599  hipEventTypeStopCommand,
600 };
601 
602 // internal hip event structure.
603 class ihipEvent_t {
604 public:
605  ihipEvent_t(unsigned flags);
606  void attachToCompletionFuture(const hc::completion_future *cf, hipStream_t stream, ihipEventType_t eventType);
607  void refereshEventStatus();
608  uint64_t timestamp() const { return _timestamp; } ;
609  ihipEventType_t type() const { return _type; };
610 
611 public:
612  hipEventStatus_t _state;
613 
614  hipStream_t _stream; // Stream where the event is recorded, or NULL if all streams.
615  unsigned _flags;
616 
617  hc::completion_future _marker;
618 
619 private:
620  ihipEventType_t _type;
621  uint64_t _timestamp; // store timestamp, may be set on host or by marker.
622 friend hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream);
623 } ;
624 
625 
626 
627 //=============================================================================
628 //class ihipDeviceCriticalBase_t
629 template <typename MUTEX_TYPE>
631 {
632 public:
633  ihipDeviceCriticalBase_t(ihipDevice_t *parentDevice) :
634  _parent(parentDevice)
635  {
636  };
637 
639 
640  }
641 
642  // Contexts:
643  void addContext(ihipCtx_t *ctx);
644  void removeContext(ihipCtx_t *ctx);
645  std::list<ihipCtx_t*> &ctxs() { return _ctxs; };
646  const std::list<ihipCtx_t*> &const_ctxs() const { return _ctxs; };
647  int getcount() {return _ctxCount;};
648  friend class LockedAccessor<ihipDeviceCriticalBase_t>;
649 private:
650  ihipDevice_t *_parent;
651 
652  //--- Context Tracker:
653  std::list< ihipCtx_t* > _ctxs; // contexts associated with this device across all threads.
654 
655  int _ctxCount;
656 };
657 
659 
661 
662 //----
663 // Properties of the HIP device.
664 // Multiple contexts can point to same device.
666 {
667 public:
668  ihipDevice_t(unsigned deviceId, unsigned deviceCnt, hc::accelerator &acc);
669  ~ihipDevice_t();
670 
671  // Accessors:
672  ihipCtx_t *getPrimaryCtx() const { return _primaryCtx; };
673  void locked_removeContext(ihipCtx_t *c);
674  void locked_reset();
675  ihipDeviceCritical_t &criticalData() { return _criticalData; };
676 public:
677  unsigned _deviceId; // device ID
678 
679  hc::accelerator _acc;
680  hsa_agent_t _hsaAgent; // hsa agent handle
681 
683  unsigned _computeUnits;
684  hipDeviceProp_t _props; // saved device properties.
685 
686  // TODO - report this through device properties, base on HCC API call.
687  int _isLargeBar;
688 
689  ihipCtx_t *_primaryCtx;
690 
691  int _state; //1 if device is set otherwise 0
692 
693 private:
694  hipError_t initProperties(hipDeviceProp_t* prop);
695 private:
696  ihipDeviceCritical_t _criticalData;
697 };
698 //=============================================================================
699 
700 
701 
702 //=============================================================================
703 //class ihipCtxCriticalBase_t
704 template <typename MUTEX_TYPE>
706 {
707 public:
708  ihipCtxCriticalBase_t(ihipCtx_t *parentCtx, unsigned deviceCnt) :
709  _parent(parentCtx),
710  _peerCnt(0)
711  {
712  _peerAgents = new hsa_agent_t[deviceCnt];
713  };
714 
716  if (_peerAgents != nullptr) {
717  delete _peerAgents;
718  _peerAgents = nullptr;
719  }
720  _peerCnt = 0;
721  }
722 
723  // Streams:
724  void addStream(ihipStream_t *stream);
725  std::list<ihipStream_t*> &streams() { return _streams; };
726  const std::list<ihipStream_t*> &const_streams() const { return _streams; };
727 
728 
729 
730  // Peer Accessor classes:
731  bool isPeerWatcher(const ihipCtx_t *peer); // returns True if peer has access to memory physically located on this device.
732  bool addPeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peer);
733  bool removePeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peer);
734  void resetPeerWatchers(ihipCtx_t *thisDevice);
735  void printPeerWatchers(FILE *f) const;
736 
737  uint32_t peerCnt() const { return _peerCnt; };
738  hsa_agent_t *peerAgents() const { return _peerAgents; };
739 
740 
741  // TODO - move private
742  std::list<ihipCtx_t*> _peers; // list of enabled peer devices.
743 
744  friend class LockedAccessor<ihipCtxCriticalBase_t>;
745 private:
746  ihipCtx_t * _parent;
747 
748  //--- Stream Tracker:
749  std::list< ihipStream_t* > _streams; // streams associated with this device.
750 
751 
752  //--- Peer Tracker:
753  // These reflect the currently Enabled set of peers for this GPU:
754  // Enabled peers have permissions to access the memory physically allocated on this device.
755  // Note the peers always contain the self agent for easy interfacing with HSA APIs.
756  uint32_t _peerCnt; // number of enabled peers
757  hsa_agent_t *_peerAgents; // efficient packed array of enabled agents (to use for allocations.)
758 private:
759  void recomputePeerAgents();
760 };
761 // Note Mutex type Real/Fake selected based on CtxMutex
763 
764 // This type is used by functions that need access to the critical device structures.
766 //=============================================================================
767 
768 
769 //=============================================================================
770 //class ihipCtx_t:
771 // A HIP CTX (context) points at one of the existing devices and contains the streams,
772 // peer-to-peer mappings, creation flags. Multiple contexts can point to the same
773 // device.
774 //
776 {
777 public: // Functions:
778  ihipCtx_t(ihipDevice_t *device, unsigned deviceCnt, unsigned flags); // note: calls constructor for _criticalData
779  ~ihipCtx_t();
780 
781  // Functions which read or write the critical data are named locked_.
782  // (might be better called "locking_"
783  // ihipCtx_t does not use recursive locks so the ihip implementation must avoid calling a locked_ function from within a locked_ function.
784  // External functions which call several locked_ functions will acquire and release the lock for each function. if this occurs in
785  // performance-sensitive code we may want to refactor by adding non-locked functions and creating a new locked_ member function to call them all.
786  void locked_removeStream(ihipStream_t *s);
787  void locked_reset();
788  void locked_waitAllStreams();
789  void locked_syncDefaultStream(bool waitOnSelf, bool syncHost);
790 
791  ihipCtxCritical_t &criticalData() { return _criticalData; };
792 
793  const ihipDevice_t *getDevice() const { return _device; };
794  int getDeviceNum() const { return _device->_deviceId; };
795 
796  // TODO - review uses of getWriteableDevice(), can these be converted to getDevice()
797  ihipDevice_t *getWriteableDevice() const { return _device; };
798 
799  std::string toString() const;
800 
801 public: // Data
802  // The NULL stream is used if no other stream is specified.
803  // Default stream has special synchronization properties with other streams.
804  ihipStream_t *_defaultStream;
805 
806  // Flags specified when the context is created:
807  unsigned _ctxFlags;
808 
809 private:
810  ihipDevice_t *_device;
811 
812 
813 private: // Critical data, protected with locked access:
814  // Members of _protected data MUST be accessed through the LockedAccessor.
815  // Search for LockedAccessor<ihipCtxCritical_t> for examples; do not access _criticalData directly.
816  ihipCtxCritical_t _criticalData;
817 
818 };
819 
820 
821 
822 //=================================================================================================
823 // Global variable definition:
824 extern std::once_flag hip_initialized;
825 extern unsigned g_deviceCnt;
826 extern hsa_agent_t g_cpu_agent ; // the CPU agent.
827 extern hsa_agent_t *g_allAgents; // CPU agents + all the visible GPU agents.
828 
829 //=================================================================================================
830 // Extern functions:
831 extern void ihipInit();
832 extern const char *ihipErrorString(hipError_t);
833 extern ihipCtx_t *ihipGetTlsDefaultCtx();
834 extern void ihipSetTlsDefaultCtx(ihipCtx_t *ctx);
835 extern hipError_t ihipSynchronize(void);
836 extern void ihipCtxStackUpdate();
837 extern hipError_t ihipDeviceSetState();
838 
839 extern ihipDevice_t *ihipGetDevice(int);
840 ihipCtx_t * ihipGetPrimaryCtx(unsigned deviceIndex);
841 
842 
843 hipStream_t ihipSyncAndResolveStream(hipStream_t);
844 
845 // Stream printf functions:
846 inline std::ostream& operator<<(std::ostream& os, const ihipStream_t& s)
847 {
848  os << "stream:";
849  os << s.getDevice()->_deviceId;;
850  os << '.';
851  os << s._id;
852  return os;
853 }
854 
855 inline std::ostream & operator<<(std::ostream& os, const dim3& s)
856 {
857  os << '{';
858  os << s.x;
859  os << ',';
860  os << s.y;
861  os << ',';
862  os << s.z;
863  os << '}';
864  return os;
865 }
866 
867 inline std::ostream & operator<<(std::ostream& os, const gl_dim3& s)
868 {
869  os << '{';
870  os << s.x;
871  os << ',';
872  os << s.y;
873  os << ',';
874  os << s.z;
875  os << '}';
876  return os;
877 }
878 
879 // Stream printf functions:
880 inline std::ostream& operator<<(std::ostream& os, const hipEvent_t& e)
881 {
882  os << "event:" << std::hex << static_cast<void*> (e);
883  return os;
884 }
885 
886 inline std::ostream& operator<<(std::ostream& os, const ihipCtx_t* c)
887 {
888  os << "ctx:" << static_cast<const void*> (c)
889  << ".dev:" << c->getDevice()->_deviceId;
890  return os;
891 }
892 
893 
894 // Helper functions that are used across src files:
895 namespace hip_internal {
896  hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream);
897 };
898 
899 
900 #endif
Definition: hip_hcc_internal.h:210
Definition: hip_hcc_internal.h:665
Definition: hip_hcc_internal.h:431
Definition: hip_hcc_internal.h:92
Definition: hip_hcc_internal.h:359
Definition: hip_hcc_internal.h:334
uint32_t x
x
Definition: hip_runtime_api.h:194
Definition: hip_hcc_internal.h:775
Definition: hip_runtime_api.h:193
uint32_t y
y
Definition: hip_runtime_api.h:195
hipError_t
Definition: hip_runtime_api.h:154
hipMemcpyKind
Definition: hip_runtime_api.h:207
Definition: hip_hcc_internal.h:345
Definition: hip_hcc_internal.h:630
unsigned _computeUnits
Number of compute units supported by the device:
Definition: hip_hcc_internal.h:683
uint32_t z
z
Definition: hip_runtime_api.h:196
Definition: hip_runtime_api.h:83
Definition: hip_hcc_internal.h:895
Definition: hip_hcc_internal.h:309
Definition: hip_hcc_internal.h:603
Definition: hip_hcc_internal.h:705
Definition: hip_hcc_internal.h:491
Definition: hip_hcc_internal.h:444
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
Record an event in the specified stream.
Definition: hip_event.cpp:128
hsa_amd_ipc_memory_t ipc_handle
ipc memory handle on ROCr
Definition: hip_hcc_internal.h:338
Definition: hip_hcc_internal.h:395
hipError_t hipStreamQuery(hipStream_t stream)
Return hipSuccess if all of the operations in the specified stream have completed, or hipErrorNotReady if not.
Definition: hip_stream.cpp:115
Definition: hip_hcc_internal.h:76