.. _program_listing_file_include_embers_amdgpu_hwid.h: Program Listing for File hwid.h =============================== |exhale_lsh| :ref:`Return to documentation for file ` (``include/embers/amdgpu/hwid.h``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp /* Copyright © 2020 Advanced Micro Devices, Inc. All rights reserved */ #ifndef HWID_H #define HWID_H #include #include #include #include #include #include #include "embers/status.h" #include "embers/amdgpu/arch.h" #include "embers/helpers/bit_helpers.cuh" namespace embers { namespace amdgpu { class HwRegIDs { public: static constexpr int MAX_NUM_HWID_REGS = 2; private: static constexpr unsigned short GFX9_ME_MSB = 31; static constexpr unsigned short GFX9_STATE_MSB = 29; static constexpr unsigned short GFX9_ME_LSB = 30; static constexpr unsigned short GFX9_STATE_LSB = 27; static constexpr unsigned short GFX9_QUEUE_MSB = 26; static constexpr unsigned short GFX9_QUEUE_LSB = 24; static constexpr unsigned short GFX9_VM_MSB = 23; static constexpr unsigned short GFX9_VM_LSB = 20; static constexpr unsigned short GFX9_TG_MSB = 19; static constexpr unsigned short GFX9_TG_LSB = 16; static constexpr unsigned short GFX9_SE_MSB = 14; static constexpr unsigned short GFX9_SE_LSB = 13; static constexpr unsigned short GFX9_SH_MSB = 12; static constexpr unsigned short GFX9_SH_LSB = 12; static constexpr unsigned short GFX9_CU_MSB = 11; static constexpr unsigned short GFX9_CU_LSB = 8; static constexpr unsigned short GFX9_PIPE_MSB = 7; static constexpr unsigned short GFX9_PIPE_LSB = 6; static constexpr unsigned short GFX9_SIMD_MSB = 5; static constexpr unsigned short GFX9_SIMD_LSB = 4; static constexpr unsigned short GFX9_WAVE_MSB = 3; static constexpr unsigned short GFX9_WAVE_LSB = 0; // GFX940Plus static constexpr unsigned short GFX940P_XCC_MSB = 3; static constexpr unsigned short GFX940P_XCC_LSB = 0; // GFX10Plus SQ_WAVE_HW_ID1 static constexpr unsigned short GFX10P_SE_MSB = 19; static constexpr unsigned short GFX10P_SE_LSB = 18; static constexpr unsigned short GFX10P_SA_MSB = 16; static constexpr unsigned short GFX10P_SA_LSB = 16; static constexpr unsigned short GFX10P_WGP_MSB = 13; static constexpr unsigned short GFX10P_WGP_LSB = 10; static constexpr unsigned short GFX10P_SIMD_MSB = 9; static constexpr unsigned short GFX10P_SIMD_LSB = 8; static constexpr unsigned short GFX10P_WAVE_MSB = 4; static constexpr unsigned short GFX10P_WAVE_LSB = 0; // GFX10Plus SQ_WAVE_HW_ID2 static constexpr unsigned short GFX10P_COMPAT_LEVEL_MSB = 30; static constexpr unsigned short GFX10P_COMPAT_LEVEL_LSB = 29; static constexpr unsigned short GFX10P_VM_MSB = 27; static constexpr unsigned short GFX10P_VM_LSB = 24; static constexpr unsigned short GFX10P_WG_MSB = 20; static constexpr unsigned short GFX10P_WG_LSB = 16; static constexpr unsigned short GFX10P_STATE_MSB = 14; static constexpr unsigned short GFX10P_STATE_LSB = 12; static constexpr unsigned short GFX10P_ME_MSB = 9; static constexpr unsigned short GFX10P_ME_LSB = 8; static constexpr unsigned short GFX10P_PIPE_MSB = 5; static constexpr unsigned short GFX10P_PIPE_LSB = 4; static constexpr unsigned short GFX10P_QUEUE_MSB = 3; static constexpr unsigned short GFX10P_QUEUE_LSB = 0; // GFX11Plus HW_ID1 static constexpr unsigned short GFX11P_DP_RATE_MSB = 31; static constexpr unsigned short GFX11P_DP_RATE_LSB = 29; static constexpr unsigned short GFX11P_SE_MSB = 20; static constexpr unsigned short GFX11P_SE_LSB = 18; static constexpr unsigned short GFX11P_SA_MSB = 16; static constexpr unsigned short GFX11P_SA_LSB = 16; static constexpr unsigned short GFX11P_WGP_MSB = 13; static constexpr unsigned short GFX11P_WGP_LSB = 10; static constexpr unsigned short GFX11P_SIMD_MSB = 9; static constexpr unsigned short GFX11P_SIMD_LSB = 8; static constexpr unsigned short GFX11P_WAVE_MSB = 4; static constexpr unsigned short GFX11P_WAVE_LSB = 0; // GFX11Plus HW_ID2 static constexpr unsigned short GFX11P_VM_MSB = 27; static constexpr unsigned short GFX11P_VM_LSB = 24; static constexpr unsigned short GFX11P_WG_MSB = 20; static constexpr unsigned short GFX11P_WG_LSB = 16; static constexpr unsigned short GFX11P_STATE_MSB = 14; static constexpr unsigned short GFX11P_STATE_LSB = 12; static constexpr unsigned short GFX11P_ME_MSB = 9; static constexpr unsigned short GFX11P_ME_LSB = 8; static constexpr unsigned short GFX11P_PIPE_MSB = 5; static constexpr unsigned short GFX11P_PIPE_LSB = 4; static constexpr unsigned short GFX11P_QUEUE_MSB = 3; static constexpr unsigned short GFX11P_QUEUE_LSB = 0; std::array raw_; GFXArch gfx_arch_; // raw[1] used for xcc ID in MI300 and for HWID reg 2 for GFX10PLus // GFX9 __device__ inline void SetArch() noexcept { #if defined(__gfx900__) || defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) gfx_arch_ = GFXArch(ArchFamily::GFX9); #elif defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__) || \ defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__) || \ defined(__gfx1033__) || defined(__gfx1034__) || defined(__gfx1035__) || defined(__gfx1036__) gfx_arch_ = GFXArch(ArchFamily::GFX10Plus); #elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) gfx_arch_ = GFXArch(ArchFamily::GFX11Plus); #elif defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) gfx_arch_ = GFXArch(ArchFamily::GFX940Plus); #elif defined(__HIP_DEVICE_COMPILE__) static_assert(false, "This GFX_ARCH is not supported"); #endif } __host__ inline void SetArch() noexcept { gfx_arch_ = GFXArch(ArchFamily::INVALID); } __device__ inline void ReadHwIDRegs() noexcept { #if defined(__gfx900__) || defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID, 0, 32)" : "=r"(raw_[0])::); raw_[1] = 0; #elif defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__) || \ defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__) || \ defined(__gfx1033__) || defined(__gfx1034__) || defined(__gfx1035__) || defined(__gfx1036__) asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID, 0, 32)" : "=r"(raw_[0])::); asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID2, 0, 32)" : "=r"(raw_[1])::); #elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID1, 0, 32)" : "=r"(raw_[0])::); asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID2, 0, 32)" : "=r"(raw_[1])::); #elif defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) gfx_arch_ = GFXArch(ArchFamily::GFX940Plus); asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID, 0, 32)" : "=r"(raw_[0])::); asm volatile("s_getreg_b32 %0, hwreg(HW_REG_XCC_ID, 0, 32)" : "=r"(raw_[1])::); #elif defined(__HIP_DEVICE_COMPILE__) static_assert(false, "This GFX_ARCH is not supported"); #endif } __host__ inline void ReadHwIDRegs() volatile noexcept {} __host__ void InitState() { SetArch(); raw_.fill(0); } __device__ void InitState() { SetArch(); ReadHwIDRegs(); } __host__ void FeatureNotSupported() { throw StatusError(Status::Code::ERROR, "HwRegIDs feature not supported for this arch"); } __device__ void FeatureNotSupported() { abort(); } public: __host__ __device__ HwRegIDs() { InitState(); } __host__ HwRegIDs(ArchFamily family, std::array regvals) : raw_(regvals), gfx_arch_(GFXArch(family)) { } __host__ __device__ uint32_t HwID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX9: case ArchFamily::GFX940Plus: return raw_.at(0); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint32_t HwID1() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX10Plus: return raw_.at(0); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint32_t HwID2() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX10Plus: return raw_.at(1); default: FeatureNotSupported(); return 0; } } __host__ __device__ GFXArch GfxArch() const noexcept { return gfx_arch_; } __host__ __device__ uint8_t WaveID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX9: case ArchFamily::GFX940Plus: return get_bits(raw_[0], GFX9_WAVE_MSB, GFX9_WAVE_LSB); case ArchFamily::GFX10Plus: return get_bits(raw_[0], GFX10P_WAVE_MSB, GFX10P_WAVE_LSB); case ArchFamily::GFX11Plus: return get_bits(raw_[0], GFX11P_WAVE_MSB, GFX11P_WAVE_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t SimdID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX9: case ArchFamily::GFX940Plus: return get_bits(raw_[0], GFX9_SIMD_MSB, GFX9_SIMD_LSB); case ArchFamily::GFX10Plus: return get_bits(raw_[0], GFX10P_SIMD_MSB, GFX10P_SIMD_LSB); case ArchFamily::GFX11Plus: return get_bits(raw_[0], GFX11P_SIMD_MSB, GFX11P_SIMD_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t PipeID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX9: case ArchFamily::GFX940Plus: return get_bits(raw_[0], GFX9_PIPE_MSB, GFX9_PIPE_LSB); case ArchFamily::GFX10Plus: return get_bits(raw_[1], GFX10P_PIPE_MSB, GFX10P_PIPE_LSB); case ArchFamily::GFX11Plus: return get_bits(raw_[1], GFX11P_PIPE_MSB, GFX11P_PIPE_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t CuID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX9: case ArchFamily::GFX940Plus: return get_bits(raw_[0], GFX9_CU_MSB, GFX9_CU_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t ShID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX9: case ArchFamily::GFX940Plus: return get_bits(raw_[0], GFX9_SH_MSB, GFX9_SH_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t SeID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX9: case ArchFamily::GFX940Plus: return get_bits(raw_[0], GFX9_SE_MSB, GFX9_SE_LSB); case ArchFamily::GFX10Plus: return get_bits(raw_[0], GFX10P_SE_MSB, GFX10P_SE_LSB); case ArchFamily::GFX11Plus: return get_bits(raw_[0], GFX11P_SE_MSB, GFX11P_SE_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t DPRate() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX11Plus: return get_bits(raw_[0], GFX11P_DP_RATE_MSB, GFX11P_DP_RATE_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t TgID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX9: case ArchFamily::GFX940Plus: return get_bits(raw_[0], GFX9_TG_MSB, GFX9_TG_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t VmID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX9: case ArchFamily::GFX940Plus: return get_bits(raw_[0], GFX9_VM_MSB, GFX9_VM_LSB); case ArchFamily::GFX10Plus: return get_bits(raw_[1], GFX10P_VM_MSB, GFX10P_VM_LSB); case ArchFamily::GFX11Plus: return get_bits(raw_[1], GFX11P_VM_MSB, GFX11P_VM_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t QueueID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX9: case ArchFamily::GFX940Plus: return get_bits(raw_[0], GFX9_QUEUE_MSB, GFX9_QUEUE_LSB); case ArchFamily::GFX10Plus: return get_bits(raw_[1], GFX10P_QUEUE_MSB, GFX10P_QUEUE_LSB); case ArchFamily::GFX11Plus: return get_bits(raw_[1], GFX11P_QUEUE_MSB, GFX11P_QUEUE_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t StateID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX9: case ArchFamily::GFX940Plus: return get_bits(raw_[0], GFX9_STATE_MSB, GFX9_STATE_LSB); case ArchFamily::GFX10Plus: return get_bits(raw_[1], GFX10P_STATE_MSB, GFX10P_STATE_LSB); case ArchFamily::GFX11Plus: return get_bits(raw_[1], GFX11P_STATE_MSB, GFX11P_STATE_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t MeID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX9: case ArchFamily::GFX940Plus: return get_bits(raw_[0], GFX9_ME_MSB, GFX9_ME_LSB); case ArchFamily::GFX10Plus: return get_bits(raw_[1], GFX10P_ME_MSB, GFX10P_ME_LSB); case ArchFamily::GFX11Plus: return get_bits(raw_[1], GFX11P_ME_MSB, GFX11P_ME_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t WgID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX10Plus: return get_bits(raw_[1], GFX10P_WG_MSB, GFX10P_WG_LSB); case ArchFamily::GFX11Plus: return get_bits(raw_[1], GFX11P_WG_MSB, GFX11P_WG_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t CompatLevelID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX10Plus: return get_bits(raw_[1], GFX10P_COMPAT_LEVEL_MSB, GFX10P_COMPAT_LEVEL_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t WgpID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX10Plus: return get_bits(raw_[0], GFX10P_WGP_MSB, GFX10P_WGP_LSB); case ArchFamily::GFX11Plus: return get_bits(raw_[0], GFX11P_WGP_MSB, GFX11P_WGP_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t SaID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX10Plus: return get_bits(raw_[0], GFX10P_SA_MSB, GFX10P_SA_LSB); case ArchFamily::GFX11Plus: return get_bits(raw_[0], GFX11P_SA_MSB, GFX11P_SA_LSB); default: FeatureNotSupported(); return 0; } } __host__ __device__ uint8_t XccID() { ReadHwIDRegs(); switch (gfx_arch_.Family()) { case ArchFamily::GFX940Plus: return get_bits(raw_[1], GFX940P_XCC_MSB, GFX940P_XCC_LSB); default: FeatureNotSupported(); return 0; } } operator std::string() { ReadHwIDRegs(); std::stringstream ss; switch (gfx_arch_.Family()) { case (ArchFamily::GFX9): ss << "family:" << std::string(gfx_arch_) << " hwid:0x" << std::setfill('0') << std::setw(8) << std::hex << raw_[0]; ss << std::dec << " wave:" << std::to_string(WaveID()) << " simd:" << std::to_string(SimdID()) << " pipe:"; ss << std::to_string(PipeID()) << " cu:" << std::to_string(CuID()) << " sh:" << std::to_string(ShID()); ss << " se:" << std::to_string(SeID()) << " tg:" << std::to_string(TgID()) << " vm:" << std::to_string(VmID()); ss << " queue:" << std::to_string(QueueID()) << " state:" << std::to_string(StateID()) << " me:0x" << std::hex << static_cast(MeID()); return ss.str(); case (ArchFamily::GFX940Plus): ss << "family:" << std::string(gfx_arch_) << " hwid:0x" << std::setfill('0') << std::setw(8) << std::hex << raw_[0]; ss << std::dec << " wave:" << std::to_string(WaveID()) << " simd:" << std::to_string(SimdID()) << " pipe:"; ss << std::to_string(PipeID()) << " cu:" << std::to_string(CuID()) << " sh:" << std::to_string(ShID()); ss << " se:" << std::to_string(SeID()) << " tg:" << std::to_string(TgID()) << " vm:" << std::to_string(VmID()); ss << " queue:" << std::to_string(QueueID()) << " state:" << std::to_string(StateID()) << " me:0x" << std::hex << static_cast(MeID()); ss << std::dec << " xcc:" << std::to_string(XccID()); return ss.str(); case (ArchFamily::GFX10Plus): ss << "family:" << std::string(gfx_arch_) << " hwid:0x" << std::setfill('0') << std::setw(8) << std::hex << raw_[0]; ss << " hwid1:0x" << std::setfill('0') << std::setw(8) << std::hex << raw_[1]; ss << std::dec << " wave:" << std::to_string(WaveID()) << " simd:" << std::to_string(SimdID()) << " wgp:" << std::to_string(WgpID()); ss << " sa:" << std::to_string(SaID()) << " se:" << std::to_string(SeID()) << " queue:" << std::to_string(QueueID()); ss << " pipe:" << std::to_string(PipeID()) << " me:" << std::to_string(MeID()) << " state:" << std::to_string(StateID()); ss << " wg:" << std::to_string(WgID()) << " vm:" << std::to_string(VmID()) << " compat_level:" << std::to_string(CompatLevelID()); return ss.str(); case (ArchFamily::GFX11Plus): ss << "family:" << std::string(gfx_arch_) << " hwid:0x" << std::setfill('0') << std::setw(8) << std::hex << raw_[0]; ss << " hwid1:0x" << std::setfill('0') << std::setw(8) << std::hex << raw_[1]; ss << std::dec << " wave:" << std::to_string(WaveID()) << " simd:" << std::to_string(SimdID()) << " wgp:" << std::to_string(WgpID()); ss << " sa:" << std::to_string(SaID()) << " se:" << std::to_string(SeID()) << " queue:" << std::to_string(QueueID()); ss << " pipe:" << std::to_string(PipeID()) << " me:" << std::to_string(MeID()) << " state:" << std::to_string(StateID()); ss << " wg:" << std::to_string(WgID()) << " vm:" << std::to_string(VmID()) << " dprate:" << std::to_string(DPRate()); return ss.str(); default: return "gfx HwID parsing not available for this gfx arch"; } } }; } // namespace amdgpu } // namespace embers #endif