Program Listing for File hwid.h
↰ Return to documentation for file (include/embers/amdgpu/hwid.h
)
/* Copyright © 2020 Advanced Micro Devices, Inc. All rights reserved */
#ifndef HWID_H
#define HWID_H
#include <array>
#include <cstdint>
#include <iomanip>
#include <sstream>
#include <string>
#include <hip/hip_runtime.h>
#include "embers/status.h"
#include "embers/amdgpu/arch.h"
#include "embers/helpers/bit_helpers.cuh"
namespace embers
{
namespace amdgpu
{
class HwRegIDs
{
public:
static constexpr int MAX_NUM_HWID_REGS = 2;
private:
static constexpr unsigned short GFX9_ME_MSB = 31;
static constexpr unsigned short GFX9_STATE_MSB = 29;
static constexpr unsigned short GFX9_ME_LSB = 30;
static constexpr unsigned short GFX9_STATE_LSB = 27;
static constexpr unsigned short GFX9_QUEUE_MSB = 26;
static constexpr unsigned short GFX9_QUEUE_LSB = 24;
static constexpr unsigned short GFX9_VM_MSB = 23;
static constexpr unsigned short GFX9_VM_LSB = 20;
static constexpr unsigned short GFX9_TG_MSB = 19;
static constexpr unsigned short GFX9_TG_LSB = 16;
static constexpr unsigned short GFX9_SE_MSB = 14;
static constexpr unsigned short GFX9_SE_LSB = 13;
static constexpr unsigned short GFX9_SH_MSB = 12;
static constexpr unsigned short GFX9_SH_LSB = 12;
static constexpr unsigned short GFX9_CU_MSB = 11;
static constexpr unsigned short GFX9_CU_LSB = 8;
static constexpr unsigned short GFX9_PIPE_MSB = 7;
static constexpr unsigned short GFX9_PIPE_LSB = 6;
static constexpr unsigned short GFX9_SIMD_MSB = 5;
static constexpr unsigned short GFX9_SIMD_LSB = 4;
static constexpr unsigned short GFX9_WAVE_MSB = 3;
static constexpr unsigned short GFX9_WAVE_LSB = 0;
// GFX940Plus
static constexpr unsigned short GFX940P_XCC_MSB = 3;
static constexpr unsigned short GFX940P_XCC_LSB = 0;
// GFX10Plus SQ_WAVE_HW_ID1
static constexpr unsigned short GFX10P_SE_MSB = 19;
static constexpr unsigned short GFX10P_SE_LSB = 18;
static constexpr unsigned short GFX10P_SA_MSB = 16;
static constexpr unsigned short GFX10P_SA_LSB = 16;
static constexpr unsigned short GFX10P_WGP_MSB = 13;
static constexpr unsigned short GFX10P_WGP_LSB = 10;
static constexpr unsigned short GFX10P_SIMD_MSB = 9;
static constexpr unsigned short GFX10P_SIMD_LSB = 8;
static constexpr unsigned short GFX10P_WAVE_MSB = 4;
static constexpr unsigned short GFX10P_WAVE_LSB = 0;
// GFX10Plus SQ_WAVE_HW_ID2
static constexpr unsigned short GFX10P_COMPAT_LEVEL_MSB = 30;
static constexpr unsigned short GFX10P_COMPAT_LEVEL_LSB = 29;
static constexpr unsigned short GFX10P_VM_MSB = 27;
static constexpr unsigned short GFX10P_VM_LSB = 24;
static constexpr unsigned short GFX10P_WG_MSB = 20;
static constexpr unsigned short GFX10P_WG_LSB = 16;
static constexpr unsigned short GFX10P_STATE_MSB = 14;
static constexpr unsigned short GFX10P_STATE_LSB = 12;
static constexpr unsigned short GFX10P_ME_MSB = 9;
static constexpr unsigned short GFX10P_ME_LSB = 8;
static constexpr unsigned short GFX10P_PIPE_MSB = 5;
static constexpr unsigned short GFX10P_PIPE_LSB = 4;
static constexpr unsigned short GFX10P_QUEUE_MSB = 3;
static constexpr unsigned short GFX10P_QUEUE_LSB = 0;
// GFX11Plus HW_ID1
static constexpr unsigned short GFX11P_DP_RATE_MSB = 31;
static constexpr unsigned short GFX11P_DP_RATE_LSB = 29;
static constexpr unsigned short GFX11P_SE_MSB = 20;
static constexpr unsigned short GFX11P_SE_LSB = 18;
static constexpr unsigned short GFX11P_SA_MSB = 16;
static constexpr unsigned short GFX11P_SA_LSB = 16;
static constexpr unsigned short GFX11P_WGP_MSB = 13;
static constexpr unsigned short GFX11P_WGP_LSB = 10;
static constexpr unsigned short GFX11P_SIMD_MSB = 9;
static constexpr unsigned short GFX11P_SIMD_LSB = 8;
static constexpr unsigned short GFX11P_WAVE_MSB = 4;
static constexpr unsigned short GFX11P_WAVE_LSB = 0;
// GFX11Plus HW_ID2
static constexpr unsigned short GFX11P_VM_MSB = 27;
static constexpr unsigned short GFX11P_VM_LSB = 24;
static constexpr unsigned short GFX11P_WG_MSB = 20;
static constexpr unsigned short GFX11P_WG_LSB = 16;
static constexpr unsigned short GFX11P_STATE_MSB = 14;
static constexpr unsigned short GFX11P_STATE_LSB = 12;
static constexpr unsigned short GFX11P_ME_MSB = 9;
static constexpr unsigned short GFX11P_ME_LSB = 8;
static constexpr unsigned short GFX11P_PIPE_MSB = 5;
static constexpr unsigned short GFX11P_PIPE_LSB = 4;
static constexpr unsigned short GFX11P_QUEUE_MSB = 3;
static constexpr unsigned short GFX11P_QUEUE_LSB = 0;
std::array<uint32_t, MAX_NUM_HWID_REGS> raw_;
GFXArch gfx_arch_;
// raw[1] used for xcc ID in MI300 and for HWID reg 2 for GFX10PLus
// GFX9
__device__ inline void SetArch() noexcept
{
#if defined(__gfx900__) || defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__)
gfx_arch_ = GFXArch(ArchFamily::GFX9);
#elif defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__) || \
defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__) || \
defined(__gfx1033__) || defined(__gfx1034__) || defined(__gfx1035__) || defined(__gfx1036__)
gfx_arch_ = GFXArch(ArchFamily::GFX10Plus);
#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__)
gfx_arch_ = GFXArch(ArchFamily::GFX11Plus);
#elif defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
gfx_arch_ = GFXArch(ArchFamily::GFX940Plus);
#elif defined(__HIP_DEVICE_COMPILE__)
static_assert(false, "This GFX_ARCH is not supported");
#endif
}
__host__ inline void SetArch() noexcept { gfx_arch_ = GFXArch(ArchFamily::INVALID); }
__device__ inline void ReadHwIDRegs() noexcept
{
#if defined(__gfx900__) || defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__)
asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID, 0, 32)" : "=r"(raw_[0])::);
raw_[1] = 0;
#elif defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__) || \
defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__) || \
defined(__gfx1033__) || defined(__gfx1034__) || defined(__gfx1035__) || defined(__gfx1036__)
asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID, 0, 32)" : "=r"(raw_[0])::);
asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID2, 0, 32)" : "=r"(raw_[1])::);
#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__)
asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID1, 0, 32)" : "=r"(raw_[0])::);
asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID2, 0, 32)" : "=r"(raw_[1])::);
#elif defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
gfx_arch_ = GFXArch(ArchFamily::GFX940Plus);
asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID, 0, 32)" : "=r"(raw_[0])::);
asm volatile("s_getreg_b32 %0, hwreg(HW_REG_XCC_ID, 0, 32)" : "=r"(raw_[1])::);
#elif defined(__HIP_DEVICE_COMPILE__)
static_assert(false, "This GFX_ARCH is not supported");
#endif
}
__host__ inline void ReadHwIDRegs() volatile noexcept {}
__host__ void InitState()
{
SetArch();
raw_.fill(0);
}
__device__ void InitState()
{
SetArch();
ReadHwIDRegs();
}
__host__ void FeatureNotSupported()
{
throw StatusError(Status::Code::ERROR, "HwRegIDs feature not supported for this arch");
}
__device__ void FeatureNotSupported() { abort(); }
public:
__host__ __device__ HwRegIDs() { InitState(); }
__host__ HwRegIDs(ArchFamily family, std::array<uint32_t, MAX_NUM_HWID_REGS> regvals)
: raw_(regvals), gfx_arch_(GFXArch(family))
{
}
__host__ __device__ uint32_t HwID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX9:
case ArchFamily::GFX940Plus:
return raw_.at(0);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint32_t HwID1()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX10Plus:
return raw_.at(0);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint32_t HwID2()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX10Plus:
return raw_.at(1);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ GFXArch GfxArch() const noexcept { return gfx_arch_; }
__host__ __device__ uint8_t WaveID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX9:
case ArchFamily::GFX940Plus:
return get_bits(raw_[0], GFX9_WAVE_MSB, GFX9_WAVE_LSB);
case ArchFamily::GFX10Plus:
return get_bits(raw_[0], GFX10P_WAVE_MSB, GFX10P_WAVE_LSB);
case ArchFamily::GFX11Plus:
return get_bits(raw_[0], GFX11P_WAVE_MSB, GFX11P_WAVE_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t SimdID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX9:
case ArchFamily::GFX940Plus:
return get_bits(raw_[0], GFX9_SIMD_MSB, GFX9_SIMD_LSB);
case ArchFamily::GFX10Plus:
return get_bits(raw_[0], GFX10P_SIMD_MSB, GFX10P_SIMD_LSB);
case ArchFamily::GFX11Plus:
return get_bits(raw_[0], GFX11P_SIMD_MSB, GFX11P_SIMD_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t PipeID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX9:
case ArchFamily::GFX940Plus:
return get_bits(raw_[0], GFX9_PIPE_MSB, GFX9_PIPE_LSB);
case ArchFamily::GFX10Plus:
return get_bits(raw_[1], GFX10P_PIPE_MSB, GFX10P_PIPE_LSB);
case ArchFamily::GFX11Plus:
return get_bits(raw_[1], GFX11P_PIPE_MSB, GFX11P_PIPE_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t CuID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX9:
case ArchFamily::GFX940Plus:
return get_bits(raw_[0], GFX9_CU_MSB, GFX9_CU_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t ShID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX9:
case ArchFamily::GFX940Plus:
return get_bits(raw_[0], GFX9_SH_MSB, GFX9_SH_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t SeID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX9:
case ArchFamily::GFX940Plus:
return get_bits(raw_[0], GFX9_SE_MSB, GFX9_SE_LSB);
case ArchFamily::GFX10Plus:
return get_bits(raw_[0], GFX10P_SE_MSB, GFX10P_SE_LSB);
case ArchFamily::GFX11Plus:
return get_bits(raw_[0], GFX11P_SE_MSB, GFX11P_SE_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t DPRate()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX11Plus:
return get_bits(raw_[0], GFX11P_DP_RATE_MSB, GFX11P_DP_RATE_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t TgID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX9:
case ArchFamily::GFX940Plus:
return get_bits(raw_[0], GFX9_TG_MSB, GFX9_TG_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t VmID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX9:
case ArchFamily::GFX940Plus:
return get_bits(raw_[0], GFX9_VM_MSB, GFX9_VM_LSB);
case ArchFamily::GFX10Plus:
return get_bits(raw_[1], GFX10P_VM_MSB, GFX10P_VM_LSB);
case ArchFamily::GFX11Plus:
return get_bits(raw_[1], GFX11P_VM_MSB, GFX11P_VM_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t QueueID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX9:
case ArchFamily::GFX940Plus:
return get_bits(raw_[0], GFX9_QUEUE_MSB, GFX9_QUEUE_LSB);
case ArchFamily::GFX10Plus:
return get_bits(raw_[1], GFX10P_QUEUE_MSB, GFX10P_QUEUE_LSB);
case ArchFamily::GFX11Plus:
return get_bits(raw_[1], GFX11P_QUEUE_MSB, GFX11P_QUEUE_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t StateID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX9:
case ArchFamily::GFX940Plus:
return get_bits(raw_[0], GFX9_STATE_MSB, GFX9_STATE_LSB);
case ArchFamily::GFX10Plus:
return get_bits(raw_[1], GFX10P_STATE_MSB, GFX10P_STATE_LSB);
case ArchFamily::GFX11Plus:
return get_bits(raw_[1], GFX11P_STATE_MSB, GFX11P_STATE_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t MeID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX9:
case ArchFamily::GFX940Plus:
return get_bits(raw_[0], GFX9_ME_MSB, GFX9_ME_LSB);
case ArchFamily::GFX10Plus:
return get_bits(raw_[1], GFX10P_ME_MSB, GFX10P_ME_LSB);
case ArchFamily::GFX11Plus:
return get_bits(raw_[1], GFX11P_ME_MSB, GFX11P_ME_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t WgID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX10Plus:
return get_bits(raw_[1], GFX10P_WG_MSB, GFX10P_WG_LSB);
case ArchFamily::GFX11Plus:
return get_bits(raw_[1], GFX11P_WG_MSB, GFX11P_WG_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t CompatLevelID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX10Plus:
return get_bits(raw_[1], GFX10P_COMPAT_LEVEL_MSB, GFX10P_COMPAT_LEVEL_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t WgpID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX10Plus:
return get_bits(raw_[0], GFX10P_WGP_MSB, GFX10P_WGP_LSB);
case ArchFamily::GFX11Plus:
return get_bits(raw_[0], GFX11P_WGP_MSB, GFX11P_WGP_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t SaID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX10Plus:
return get_bits(raw_[0], GFX10P_SA_MSB, GFX10P_SA_LSB);
case ArchFamily::GFX11Plus:
return get_bits(raw_[0], GFX11P_SA_MSB, GFX11P_SA_LSB);
default:
FeatureNotSupported();
return 0;
}
}
__host__ __device__ uint8_t XccID()
{
ReadHwIDRegs();
switch (gfx_arch_.Family()) {
case ArchFamily::GFX940Plus:
return get_bits(raw_[1], GFX940P_XCC_MSB, GFX940P_XCC_LSB);
default:
FeatureNotSupported();
return 0;
}
}
operator std::string()
{
ReadHwIDRegs();
std::stringstream ss;
switch (gfx_arch_.Family()) {
case (ArchFamily::GFX9):
ss << "family:" << std::string(gfx_arch_) << " hwid:0x" << std::setfill('0') << std::setw(8)
<< std::hex << raw_[0];
ss << std::dec << " wave:" << std::to_string(WaveID())
<< " simd:" << std::to_string(SimdID()) << " pipe:";
ss << std::to_string(PipeID()) << " cu:" << std::to_string(CuID())
<< " sh:" << std::to_string(ShID());
ss << " se:" << std::to_string(SeID()) << " tg:" << std::to_string(TgID())
<< " vm:" << std::to_string(VmID());
ss << " queue:" << std::to_string(QueueID()) << " state:" << std::to_string(StateID())
<< " me:0x" << std::hex << static_cast<unsigned int>(MeID());
return ss.str();
case (ArchFamily::GFX940Plus):
ss << "family:" << std::string(gfx_arch_) << " hwid:0x" << std::setfill('0') << std::setw(8)
<< std::hex << raw_[0];
ss << std::dec << " wave:" << std::to_string(WaveID())
<< " simd:" << std::to_string(SimdID()) << " pipe:";
ss << std::to_string(PipeID()) << " cu:" << std::to_string(CuID())
<< " sh:" << std::to_string(ShID());
ss << " se:" << std::to_string(SeID()) << " tg:" << std::to_string(TgID())
<< " vm:" << std::to_string(VmID());
ss << " queue:" << std::to_string(QueueID()) << " state:" << std::to_string(StateID())
<< " me:0x" << std::hex << static_cast<unsigned int>(MeID());
ss << std::dec << " xcc:" << std::to_string(XccID());
return ss.str();
case (ArchFamily::GFX10Plus):
ss << "family:" << std::string(gfx_arch_) << " hwid:0x" << std::setfill('0') << std::setw(8)
<< std::hex << raw_[0];
ss << " hwid1:0x" << std::setfill('0') << std::setw(8) << std::hex << raw_[1];
ss << std::dec << " wave:" << std::to_string(WaveID())
<< " simd:" << std::to_string(SimdID()) << " wgp:" << std::to_string(WgpID());
ss << " sa:" << std::to_string(SaID()) << " se:" << std::to_string(SeID())
<< " queue:" << std::to_string(QueueID());
ss << " pipe:" << std::to_string(PipeID()) << " me:" << std::to_string(MeID())
<< " state:" << std::to_string(StateID());
ss << " wg:" << std::to_string(WgID()) << " vm:" << std::to_string(VmID())
<< " compat_level:" << std::to_string(CompatLevelID());
return ss.str();
case (ArchFamily::GFX11Plus):
ss << "family:" << std::string(gfx_arch_) << " hwid:0x" << std::setfill('0') << std::setw(8)
<< std::hex << raw_[0];
ss << " hwid1:0x" << std::setfill('0') << std::setw(8) << std::hex << raw_[1];
ss << std::dec << " wave:" << std::to_string(WaveID())
<< " simd:" << std::to_string(SimdID()) << " wgp:" << std::to_string(WgpID());
ss << " sa:" << std::to_string(SaID()) << " se:" << std::to_string(SeID())
<< " queue:" << std::to_string(QueueID());
ss << " pipe:" << std::to_string(PipeID()) << " me:" << std::to_string(MeID())
<< " state:" << std::to_string(StateID());
ss << " wg:" << std::to_string(WgID()) << " vm:" << std::to_string(VmID())
<< " dprate:" << std::to_string(DPRate());
return ss.str();
default:
return "gfx HwID parsing not available for this gfx arch";
}
}
};
} // namespace amdgpu
} // namespace embers
#endif