Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
2f09b6d
Add third party submodiles
rochauha Apr 20, 2026
ca1dc94
Dump source code
rochauha Apr 20, 2026
96921d0
Update CMakeLists.txt for submodule directory names
rochauha Apr 21, 2026
cc44080
Use static for functions in tools and add some comments
rochauha Apr 21, 2026
2c32ef2
Add initial README
rochauha Apr 21, 2026
3d47364
Update README.md
rochauha Apr 21, 2026
b784ff8
Use C++ 20
rochauha Apr 21, 2026
605fece
use std::string::starts_with in update-note
rochauha Apr 21, 2026
6cb5b8e
More use of static on functions
rochauha Apr 21, 2026
0564a00
Use ROCM_PATH instead of /opt/rocm for preload
rochauha Apr 21, 2026
b072a34
Address comments to extract-fatbin
rochauha Apr 24, 2026
9f86d24
Address comments to extract-gpubin
rochauha Apr 24, 2026
43198b0
Use SYSTEM keyword to eliminate third party code warnings
rochauha Apr 30, 2026
1ca986c
Rename project to amd-gpu-tools in CMake
rochauha Apr 30, 2026
3512594
Remove use of VLA in update-fatbin
rochauha Apr 30, 2026
466bc55
Modernize for loop in getSection
rochauha Apr 30, 2026
c95366e
Enable more compiler warnings
rochauha Apr 30, 2026
3a428a0
Fix new warnings in update-note-phdr
rochauha Apr 30, 2026
33197e3
Fix warnings in update-notes
rochauha Apr 30, 2026
6ccb097
Fix warnings in update-fatbin
rochauha Apr 30, 2026
9e44c05
Fix warnigns in update-exec
rochauha Apr 30, 2026
f516d73
Use constexpr for alignment
rochauha Apr 30, 2026
e92b7fd
Update extract-fatbin to take output argument
rochauha May 1, 2026
7cc253f
Update extract-gpubin to take output argument
rochauha May 1, 2026
0593154
Update update-fatbin to take output argument
rochauha May 1, 2026
29771f6
Update update-note to take output argument
rochauha May 1, 2026
0f89d8e
Cleanup instr-driver
rochauha May 1, 2026
a29942b
Fix comment in update-note
rochauha May 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[submodule "third-party/msgpack-c"]
path = third-party/msgpack-c
url = https://github.com/msgpack/msgpack-c.git
branch = cpp_master
[submodule "third-party/ELFIO"]
path = third-party/ELFIO
url = https://github.com/serge1/ELFIO.git
branch = main
76 changes: 76 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
cmake_minimum_required(VERSION 3.20)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

Comment thread
rochauha marked this conversation as resolved.
set(ROCM_PATH
""
CACHE PATH "Path to ROCm install directory")

if("${ROCM_PATH}" STREQUAL "")
message(
FATAL_ERROR
"\n ROCM_PATH not set"
"Please provide it using: cmake -DROCM_PATH=/path/to/rocm/install \n")
endif()

set(CMAKE_C_COMPILER "${ROCM_PATH}/bin/amdclang")
set(CMAKE_CXX_COMPILER "${ROCM_PATH}/bin/amdclang++")

project(amdgpu-tooling LANGUAGES CXX)
# ALL REGULAR TOOLS

add_executable(update-note update-note.cpp)
target_include_directories(
update-note PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/third-party/msgpack-c/include)
Comment thread
rochauha marked this conversation as resolved.
Outdated

add_executable(update-note-phdr update-note-phdr.cpp)
target_include_directories(
update-note-phdr PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/third-party/ELFIO)
Comment thread
rochauha marked this conversation as resolved.
Outdated

add_executable(extract-fatbin extract-fatbin.cpp)
target_include_directories(
extract-fatbin PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/third-party/ELFIO)
Comment thread
rochauha marked this conversation as resolved.
Outdated

add_executable(extract-gpubin extract-gpubin.cpp)

add_executable(update-fatbin update-fatbin.cpp)

add_executable(update-exec update-exec.cpp)
target_include_directories(
update-exec PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/third-party/ELFIO)
Comment thread
rochauha marked this conversation as resolved.
Outdated

# SPECIAL CASE FOR PRELOAD

# Paths for hipcc and the preload file
set(HIPCC "${ROCM_PATH}/bin/hipcc")
set(PRELOAD_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/preload.cpp")
set(PRELOAD_SO "${CMAKE_CURRENT_BINARY_DIR}/preload.so")

# Actual command to build preload.so
add_custom_command(
OUTPUT "${PRELOAD_SO}"
COMMAND ${HIPCC} -D__HIP_PLATFORM_AMD__ -x c++ -shared -fpic
-I"${ROCM_PATH}"/include/ "${PRELOAD_SOURCE}" -o "${PRELOAD_SO}"
DEPENDS "${PRELOAD_SOURCE}"
COMMENT "Building ${PRELOAD_SOURCE} with ${HIPCC}"
VERBATIM)

add_custom_target(PreloadFile ALL DEPENDS "${PRELOAD_SO}")

# SPECIAL CASE FOR instr-driver

# Paths for instr-driver
set(DRIVER_SCRIPT_SRC "${CMAKE_CURRENT_SOURCE_DIR}/instr-driver")
set(DRIVER_SCRIPT_DEST "${CMAKE_CURRENT_BINARY_DIR}/instr-driver")

# Command to copy instr-driver
add_custom_command(
OUTPUT "${DRIVER_SCRIPT_DEST}"
COMMAND ${CMAKE_COMMAND} -E copy "${DRIVER_SCRIPT_SRC}"
"${DRIVER_SCRIPT_DEST}"
DEPENDS "${DRIVER_SCRIPT_SRC}"
COMMENT "Copying ${DRIVER_SCRIPT_SRC} to build directory"
VERBATIM)

add_custom_target(DriverScript ALL DEPENDS "${DRIVER_SCRIPT_DEST}")
26 changes: 26 additions & 0 deletions README.md
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should be a section with that lists each of the programs/libraries built. There should be a brief description of its purpose, how it is invoked including arguments.

Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# AMDGPU Tools for Dyninst

The Dyninst mutator can currently only rewrite the GPU ELF binary, but doesn't rewrite the metadata in the GPU binary.

These tools are used alongside the Dyninst mutator for all the additional tasks. This includes the following:
1. Extracting and embedding the fat binary in the host executable
2. Extracting and embedding the GPU ELF binary in the fat binary
3. Rewriting metadata in the instrumented GPU binary
4. Using a preload library to pass additional argument for kernel launch

These tools are tested and developed on ROCm 6.0.0 and GFX908.

## Building
```
cmake /path/to/amd_gpu_tools -DROCM_PATH=/path/to/rocm/install
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Include initializing the submodules, and making the targets:

Suggested change
cmake /path/to/amd_gpu_tools -DROCM_PATH=/path/to/rocm/install
AGP_SRC=/path/to/amd_gpu_tools
AGP_BUILD=/path/to/amd_gpu_tools_builddir
ROCM_INSTALL=/path/to/rocm/install
cd $AMD_GPU_TOOLS
git submodule update --init --recursive
cmake -DROCM_PATH=$ROCM_INSTALL -B $AGP_BUILD -S $AGP_SRC
cd $AGP_BUILD
make

```

## Running

```
instr-driver <dyninst-mutator> <host-executable>
```

The host executable contains the host code and the fat binary which contains device code.

Ensure that the build directory for these tools is appended to `PATH`
43 changes: 43 additions & 0 deletions extract-fatbin.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#include <cassert>
#include <fstream>
#include <iostream>

#include "elfio/elfio.hpp"

static ELFIO::section *getSection(const std::string &sectionName, const ELFIO::elfio &file) {
for (int i = 0; i < file.sections.size(); ++i) {
if (file.sections[i]->get_name() == sectionName)
return file.sections[i];
Comment thread
rochauha marked this conversation as resolved.
Outdated
}
return nullptr;
}

static ELFIO::section *getFatbinSection(const ELFIO::elfio &file) {
return getSection(".hip_fatbin", file);
}

int main(int argc, char **argv) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " <executable file with fatbin>" << std::endl;
Comment thread
rochauha marked this conversation as resolved.
Outdated
return 1;
}

ELFIO::elfio execFile;
if (!execFile.load(argv[1])) {
std::cerr << "can't find or process ELF file " << argv[1] << '\n';
exit(1);
}

ELFIO::section *fatbinSection = getFatbinSection(execFile);
if (!fatbinSection) {
std::cerr << ".hip_fatbin section not found in " << argv[1] << "\n";
exit(1);
}

// Write fatbin to a separate file
std::ofstream fatbinFile(std::string(argv[1]) + ".fatbin", std::ios::out | std::ios::binary);

fatbinFile.write(fatbinSection->get_data(), fatbinSection->get_size());

return 0;
}
91 changes: 91 additions & 0 deletions extract-gpubin.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#include <cassert>
#include <fstream>
#include <iostream>
#include <string>

static void showHelp(const std::string &toolName) {
std::cerr << "Usage : " << toolName << " <arch-name> "
<< "<path-to-fatbin>" << std::endl;
std::cerr << "supported architectures : gfx900, gfx906, gfx908, gfx90a, gfx940" << std::endl;
Comment thread
rochauha marked this conversation as resolved.
}

int main(int argc, char *argv[]) {
if (argc != 3) {
showHelp(argv[0]);
exit(1);
}

std::string arch(argv[1]);
std::string fatbinPath(argv[2]);

std::ifstream fatbin(fatbinPath, std::ios::binary);
if (!fatbin) {
std::cerr << "error : can't open " << fatbinPath << std::endl;
exit(1);
}

// This is at the beginning of the clang-offload-bundle file.
// See https://clang.llvm.org/docs/ClangOffloadBundler.html
constexpr std::string_view magicString("__CLANG_OFFLOAD_BUNDLE__");
constexpr uint32_t magicStringLength = magicString.length();

char buffer[magicStringLength + 1];
fatbin.read(buffer, magicStringLength);
buffer[magicStringLength] = 0;

assert(std::string(buffer) == magicString);

uint64_t numBundleEntries = 0;
fatbin.read(reinterpret_cast<char *>(&numBundleEntries), sizeof(numBundleEntries));

uint64_t elfStart = 0;
uint64_t elfSize = 0;
bool found = false;

// Read metadata for each elf object in this bundle
while (numBundleEntries) {
uint64_t bundleEntryCodeObjectOffset; // offset from begining of the fatbin
fatbin.read(reinterpret_cast<char *>(&bundleEntryCodeObjectOffset),
sizeof(bundleEntryCodeObjectOffset));

uint64_t size;
fatbin.read(reinterpret_cast<char *>(&size), sizeof(size));

uint64_t idLength;
fatbin.read(reinterpret_cast<char *>(&idLength), sizeof(idLength));

std::string idString;
idString.resize(idLength);
fatbin.read(&idString[0], idLength);

// If idString ends with arch
if (idString.substr(idLength - arch.length()) == arch) {
elfStart = bundleEntryCodeObjectOffset;
elfSize = size;
found = true;
}
numBundleEntries--;
}

if (!found) {
std::cerr << fatbinPath << " doesn't contain a " << arch << " binary\n";
exit(1);
}

// std::cout << arch << ' ' << "ELF at " << elfStart << " of size " << elfSize << '\n';

fatbin.seekg(elfStart, std::ios::beg);
std::string data;
data.resize(elfSize);
fatbin.read(&data[0], elfSize);
Comment on lines +80 to +81
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might be a problem is the fatbin arch is large as the entire thing is put in memory. OK for now, but it would be better to use a fixed sized buffer and a loop to read/write the data. At least add a comment to fix this in the future.


std::string elfBinPath(fatbinPath + "." + arch);
std::ofstream elfBin(elfBinPath, std::ios::binary);

if (!elfBin) {
std::cerr << "error : can't create " << elfBinPath << std::endl;
exit(1);
}

elfBin.write(&data[0], elfSize);
}
81 changes: 81 additions & 0 deletions instr-driver
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/bin/bash

# TODO : Make this script more like a CLI tool

MUTATOR=$1

EXEC_IN=$2
EXEC_UPDATED=$EXEC_IN.updated
EXEC_RENAMED1=$EXEC_UPDATED.renamed1
EXEC_RENAMED2=$EXEC_UPDATED.renamed2
EXEC_OUT=$EXEC_IN.out

FATBIN=$EXEC_IN.fatbin
FATBIN_UPDATED=$FATBIN.updated

GPUBIN=$FATBIN.gfx908
GPUBIN_INSTR=$GPUBIN-instr
GPUBIN_UPDATED=$GPUBIN_INSTR
GPUBIN_UPDATED_NOTE=$GPUBIN_INSTR.updated-note
GPUBIN_FINAL=$GPUBIN.final

# Contains names of instrumented kernels
NAMES_FILE=$GPUBIN.instrumentedKernelNames

NOTE_IN=$GPUBIN.note
NOTE_OUT=$NOTE_IN.expanded
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lots of hardcoded names being created. OK for now, but it make it difficult for a user to know what and where files are going to be created.


# 1. Extract fatbin. This will output a $FATBIN
extract-fatbin $EXEC_IN
Comment thread
rochauha marked this conversation as resolved.
Outdated

# 2. Extract gfx908 bin. This will output $GPUBIN
extract-gpubin gfx908 $FATBIN

# 3. Run the mutator, instrument kernels (also use the information from step 3).
# This will also emit a file containing list of instrumented kernels ($NAMES_FILE)
# $MUTATOR -procedure-count $GPUBIN
$MUTATOR $GPUBIN

# 4. Update kernel descriptors for instrumented kernels
# This will produce $GPUBIN_UPDATED
# update-kd $NAMES_FILE $GPUBIN_INSTR

# 5. Modify the note metadata
#
# 5.1 Extract the note section from original binary
llvm-objcopy --dump-section=.note=$NOTE_IN $GPUBIN

# 5.2 For each instrumented kernel, modify the metadata as follows:
# - Expand the kernarg buffer with 1 additional argument, which the additional memory that we will allocate via the host.
# - Increase SGPR usage to 112 (GFX908 only for now)
# This will emit $NOTE_OUT.
update-note $NAMES_FILE $NOTE_IN

# 5.3 Copy the updated binary, remove the note section
cp $GPUBIN_INSTR $GPUBIN_UPDATED_NOTE
llvm-objcopy --remove-section=.note $GPUBIN_UPDATED_NOTE

# 5.4 Add the expanded note section
llvm-objcopy --add-section=.note=$NOTE_OUT $GPUBIN_UPDATED_NOTE

# 5.5 Update the program header for the notes section
update-note-phdr $GPUBIN_UPDATED_NOTE $GPUBIN_FINAL

# 6. Update original fatbin with instrumented gpu binary ($GPUBIN_FINAL)
# This will emit $FATBIN_UPDATED
update-fatbin gfx908 $GPUBIN_FINAL $FATBIN

# 7. Update the original executable ($EXEC_IN) by embedding $FATBIN_UPDATED
# This will emit $EXEC_UPDATED
update-exec $EXEC_IN $FATBIN_UPDATED $EXEC_UPDATED

# 8. Rename fatbin sections so that roc-obj* tools work with the modified executable. Those tools specifically look for the .hip_fatbin section by name.
# - Rename .hip_fatbin section to .old_fatbin
# - Rename .new_fatbin section to .hip_fatbin
# It is possible to do this within the update-exec tool, but doing it here is simpler and less error-prone
llvm-objcopy --rename-section .hip_fatbin=.old_fatbin $EXEC_UPDATED $EXEC_RENAMED1
llvm-objcopy --rename-section .new_fatbin=.hip_fatbin $EXEC_RENAMED1 $EXEC_RENAMED2

cp $EXEC_RENAMED2 $EXEC_OUT

chmod +x $EXEC_OUT
Loading