Skip to content

Commit

Permalink
WA for rocm-6.2+ s constrait for buffer resource (#1346)
Browse files Browse the repository at this point in the history
* WA for rocm-6.2+ s constrait for buffer resource

* add missing memory clobber
  • Loading branch information
carlushuang authored and junliume committed Jun 27, 2024
1 parent 406baa5 commit c658982
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 3 deletions.
3 changes: 2 additions & 1 deletion include/ck/utility/amd_buffer_addressing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,8 @@ __device__ void amd_direct_load_global_to_lds(const T* global_base_ptr,
asm volatile("s_mov_b32 m0, %0; \n\t"
"buffer_load_dword %1, %2, 0 offen lds;\n\t" ::"s"(lds_ptr_sgpr),
"v"(global_offset_bytes),
"s"(src_resource));
"s"(src_resource)
: "memory");
#else
// LDS pointer must be attributed with the LDS address space.
__attribute__((address_space(3))) uint32_t* lds_ptr =
Expand Down
10 changes: 8 additions & 2 deletions include/ck_tile/core/arch/amd_buffer_addressing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@ struct __attribute__((packed)) buffer_resource
CK_TILE_DEVICE int32x4_t make_wave_buffer_resource(const void* ptr, uint32_t size = 0xffffffff)
{
buffer_resource res{ptr, size, CK_TILE_BUFFER_RESOURCE_3RD_DWORD};
return __builtin_bit_cast(int32x4_t, res);
int32x4_t r = __builtin_bit_cast(int32x4_t, res);
r.x = __builtin_amdgcn_readfirstlane(r.x);
r.y = __builtin_amdgcn_readfirstlane(r.y);
r.z = __builtin_amdgcn_readfirstlane(r.z);
r.w = __builtin_amdgcn_readfirstlane(r.w);
return r;
}

// TODO: glc/slc/...
Expand Down Expand Up @@ -2016,7 +2021,8 @@ CK_TILE_DEVICE void amd_direct_load_global_to_lds(const T* global_base_ptr,
asm volatile("s_mov_b32 m0, %0; \n\t"
"buffer_load_dword %1, %2, 0 offen lds;\n\t" ::"s"(lds_ptr_sgpr),
"v"(global_offset_bytes),
"s"(src_resource));
"s"(src_resource)
: "memory");
#else
// LDS pointer must be attributed with the LDS address space.
__attribute__((address_space(3))) uint32_t* lds_ptr =
Expand Down

0 comments on commit c658982

Please sign in to comment.