Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 47 additions & 3 deletions taichi/rhi/amdgpu/amdgpu_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,54 @@ AMDGPUContext::AMDGPUContext()

void *hip_device_prop = std::malloc(HIP_DEVICE_PROPERTIES_STRUCT_SIZE);
driver_.device_get_prop(hip_device_prop, device_);
compute_capability_ = *((int *)hip_device_prop + HIP_DEVICE_GCN_ARCH);
std::free(hip_device_prop);

mcpu_ = fmt::format("gfx{}", compute_capability_);
// Obtain compute capability and arch name using hip_device_prop.
int runtime_version;
driver_.runtime_get_version(&runtime_version);

// Future-proof way of getting compute_capability_ and mcpu_.
//
// hipGetDeviceProperties has two versions due to an ABI-breaking change in
// ROCm 6: hipGetDevicePropertiesR0000 and hipGetDevicePropertiesR0600. The
// former is for ROCm 5 and the latter is for ROCm 6. However, even in ROCm
// 6, the ABI symbol hipGetDeviceProperties in libamdhip64.so actually maps
// to hipGetDevicePropertiesR0000, the ROCm 5 version! See this commit for
// more details:
// https://github.com/ROCm/clr/commit/3e72b8d1e12347914974d4e7124cb205796f39f6
//
// IMO this is a bug, so in case of this behavior getting changed, let's first
// treat hipGetDeviceProperties as hipGetDevicePropertiesR0000, and if we're
// not getting a proper mcpu_, then we treat it as
// hipGetDevicePropertiesR0600.
//
// We can do this safely because hipDeviceProp_t is larger in R0600 then
// R0000, so using the field offset values in ROCm 5 on a ROCm 6 struct can
// never cause an out-of-bounds access.
compute_capability_ =
(*((int *)(hip_device_prop) + int(HIP_DEVICE_MAJOR))) * 100;
compute_capability_ +=
(*((int *)(hip_device_prop) + int(HIP_DEVICE_MINOR))) * 10;
mcpu_ =
std::string((char *)((int *)hip_device_prop + HIP_DEVICE_GCN_ARCH_NAME));
// Basic sanity check on mcpu_ to ensure we're calling R0000 instead of R0600
if (mcpu_.empty() || mcpu_.substr(0, 3) != "gfx") {
// ROCm 6 starts with 60000000
if (runtime_version < 60000000) {
TI_ERROR(
"hipGetDevicePropertiesR0000 returned an invalid mcpu_ but HIP "
"version {} is not ROCm 6",
runtime_version);
}
compute_capability_ =
(*((int *)(hip_device_prop) + int(HIP_DEVICE_MAJOR_6))) * 100;
compute_capability_ +=
(*((int *)(hip_device_prop) + int(HIP_DEVICE_MINOR_6))) * 10;
mcpu_ = std::string(
(char *)((int *)(hip_device_prop) + int(HIP_DEVICE_GCN_ARCH_NAME_6)));
}
// Strip out xnack/ecc from name
mcpu_ = mcpu_.substr(0, mcpu_.find(":"));
std::free(hip_device_prop);

TI_TRACE("Emitting AMDGPU code for {}", mcpu_);
}
Expand Down
1 change: 1 addition & 0 deletions taichi/rhi/amdgpu/amdgpu_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ AMDGPUDriver::AMDGPUDriver() {
loader_->load_function("hipGetErrorName", get_error_name);
loader_->load_function("hipGetErrorString", get_error_string);
loader_->load_function("hipDriverGetVersion", driver_get_version);
loader_->load_function("hipRuntimeGetVersion", runtime_get_version);

int version;
driver_get_version(&version);
Expand Down
20 changes: 18 additions & 2 deletions taichi/rhi/amdgpu/amdgpu_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,22 @@ constexpr uint32 HIP_MEM_ATTACH_GLOBAL = 0x1;
constexpr uint32 HIP_MEM_ADVISE_SET_PREFERRED_LOCATION = 3;
constexpr uint32 HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 26;
constexpr uint32 HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 63;
constexpr uint32 HIP_DEVICE_PROPERTIES_STRUCT_SIZE = 792;
constexpr uint32 HIP_DEVICE_GCN_ARCH = 98;
// sizeof(hipDeviceProperties_t) in ROCm 6.
// ROCm 5.7.1 is 792 and ROCm 6 is 1472, so to make both work we use whichever
// is larger.
constexpr uint32 HIP_DEVICE_PROPERTIES_STRUCT_SIZE = 1472;
// offsetof(hipDeviceProp_t, gcnArchName) / 4
constexpr uint32 HIP_DEVICE_GCN_ARCH_NAME = 396 / 4;
// offsetof(hipDeviceProp_t, gcnArchName) / 4
constexpr uint32 HIP_DEVICE_GCN_ARCH_NAME_6 = 1160 / 4;
// offsetof(hipDeviceProp_t, major) / 4
constexpr uint32 HIP_DEVICE_MAJOR = 328 / 4;
// offsetof(hipDeviceProp_t, major) / 4
constexpr uint32 HIP_DEVICE_MAJOR_6 = 360 / 4;
// offsetof(hipDeviceProp_t, minor) / 4
constexpr uint32 HIP_DEVICE_MINOR = 332 / 4;
// offsetof(hipDeviceProp_t, minor) / 4
constexpr uint32 HIP_DEVICE_MINOR_6 = 364 / 4;
constexpr uint32 HIP_ERROR_ASSERT = 710;
constexpr uint32 HIP_JIT_MAX_REGISTERS = 0;
constexpr uint32 HIP_POINTER_ATTRIBUTE_MEMORY_TYPE = 2;
Expand Down Expand Up @@ -101,6 +115,8 @@ class AMDGPUDriver : protected AMDGPUDriverBase {

void (*driver_get_version)(int *);

void (*runtime_get_version)(int *);

bool detected();

static AMDGPUDriver &get_instance();
Expand Down