-
Notifications
You must be signed in to change notification settings - Fork 0
Add all tools as a first step #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
2f09b6d
ca1dc94
96921d0
cc44080
2c32ef2
3d47364
b784ff8
605fece
6cb5b8e
0564a00
b072a34
9f86d24
43198b0
1ca986c
3512594
466bc55
c95366e
3a428a0
33197e3
6ccb097
9e44c05
f516d73
e92b7fd
7cc253f
0593154
29771f6
0f89d8e
a29942b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| [submodule "third-party/msgpack-c"] | ||
| path = third-party/msgpack-c | ||
| url = https://github.com/msgpack/msgpack-c.git | ||
| branch = cpp_master | ||
| [submodule "third-party/ELFIO"] | ||
| path = third-party/ELFIO | ||
| url = https://github.com/serge1/ELFIO.git | ||
| branch = main |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| cmake_minimum_required(VERSION 3.20) | ||
| set(CMAKE_CXX_STANDARD 20) | ||
| set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
| set(CMAKE_CXX_EXTENSIONS OFF) | ||
|
|
||
| set(CMAKE_CXX_FLAGS "-Wall -Wextra -pedantic") | ||
| set(CMAKE_C_FLAGS "-Wall -Wextra -pedantic") | ||
|
|
||
| set(ROCM_PATH | ||
| "" | ||
| CACHE PATH "Path to ROCm install directory") | ||
|
|
||
| if("${ROCM_PATH}" STREQUAL "") | ||
| message( | ||
| FATAL_ERROR | ||
| "\n ROCM_PATH not set" | ||
| "Please provide it using: cmake -DROCM_PATH=/path/to/rocm/install \n") | ||
| endif() | ||
|
|
||
| set(CMAKE_C_COMPILER "${ROCM_PATH}/bin/amdclang") | ||
| set(CMAKE_CXX_COMPILER "${ROCM_PATH}/bin/amdclang++") | ||
|
|
||
| project(amd-gpu-tools LANGUAGES CXX) | ||
| # ALL REGULAR TOOLS | ||
|
|
||
| add_executable(update-note update-note.cpp) | ||
| target_include_directories( | ||
| update-note SYSTEM PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/third-party/msgpack-c/include) | ||
|
|
||
| add_executable(update-note-phdr update-note-phdr.cpp) | ||
| target_include_directories( | ||
| update-note-phdr SYSTEM PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/third-party/ELFIO) | ||
|
|
||
| add_executable(extract-fatbin extract-fatbin.cpp) | ||
| target_include_directories( | ||
| extract-fatbin SYSTEM PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/third-party/ELFIO) | ||
|
|
||
| add_executable(extract-gpubin extract-gpubin.cpp) | ||
|
|
||
| add_executable(update-fatbin update-fatbin.cpp) | ||
|
|
||
| add_executable(update-exec update-exec.cpp) | ||
| target_include_directories( | ||
| update-exec SYSTEM PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/third-party/ELFIO) | ||
|
|
||
| # SPECIAL CASE FOR PRELOAD | ||
|
|
||
| # Paths for hipcc and the preload file | ||
| set(HIPCC "${ROCM_PATH}/bin/hipcc") | ||
| set(PRELOAD_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/preload.cpp") | ||
| set(PRELOAD_SO "${CMAKE_CURRENT_BINARY_DIR}/preload.so") | ||
|
|
||
| # Actual command to build preload.so | ||
| add_custom_command( | ||
| OUTPUT "${PRELOAD_SO}" | ||
| COMMAND ${HIPCC} -D__HIP_PLATFORM_AMD__ -x c++ -shared -fpic | ||
| -I"${ROCM_PATH}"/include/ "${PRELOAD_SOURCE}" -o "${PRELOAD_SO}" | ||
| DEPENDS "${PRELOAD_SOURCE}" | ||
| COMMENT "Building ${PRELOAD_SOURCE} with ${HIPCC}" | ||
| VERBATIM) | ||
|
|
||
| add_custom_target(PreloadFile ALL DEPENDS "${PRELOAD_SO}") | ||
|
|
||
| # SPECIAL CASE FOR instr-driver | ||
|
|
||
| # Paths for instr-driver | ||
| set(DRIVER_SCRIPT_SRC "${CMAKE_CURRENT_SOURCE_DIR}/instr-driver") | ||
| set(DRIVER_SCRIPT_DEST "${CMAKE_CURRENT_BINARY_DIR}/instr-driver") | ||
|
|
||
| # Command to copy instr-driver | ||
| add_custom_command( | ||
| OUTPUT "${DRIVER_SCRIPT_DEST}" | ||
| COMMAND ${CMAKE_COMMAND} -E copy "${DRIVER_SCRIPT_SRC}" | ||
| "${DRIVER_SCRIPT_DEST}" | ||
| DEPENDS "${DRIVER_SCRIPT_SRC}" | ||
| COMMENT "Copying ${DRIVER_SCRIPT_SRC} to build directory" | ||
| VERBATIM) | ||
|
|
||
| add_custom_target(DriverScript ALL DEPENDS "${DRIVER_SCRIPT_DEST}") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There should be a section with that lists each of the programs/libraries built. There should be a brief description of its purpose, how it is invoked including arguments. |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,26 @@ | ||||||||||||||||||||
| # AMDGPU Tools for Dyninst | ||||||||||||||||||||
|
|
||||||||||||||||||||
| The Dyninst mutator can currently only rewrite the GPU ELF binary, but doesn't rewrite the metadata in the GPU binary. | ||||||||||||||||||||
|
|
||||||||||||||||||||
| These tools are used alongside the Dyninst mutator for all the additional tasks. This includes the following: | ||||||||||||||||||||
| 1. Extracting and embedding the fat binary in the host executable | ||||||||||||||||||||
| 2. Extracting and embedding the GPU ELF binary in the fat binary | ||||||||||||||||||||
| 3. Rewriting metadata in the instrumented GPU binary | ||||||||||||||||||||
| 4. Using a preload library to pass additional argument for kernel launch | ||||||||||||||||||||
|
|
||||||||||||||||||||
| These tools are tested and developed on ROCm 6.0.0 and GFX908. | ||||||||||||||||||||
|
|
||||||||||||||||||||
| ## Building | ||||||||||||||||||||
| ``` | ||||||||||||||||||||
| cmake /path/to/amd_gpu_tools -DROCM_PATH=/path/to/rocm/install | ||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Include initializing the submodules, and making the targets:
Suggested change
|
||||||||||||||||||||
| ``` | ||||||||||||||||||||
|
|
||||||||||||||||||||
| ## Running | ||||||||||||||||||||
|
|
||||||||||||||||||||
| ``` | ||||||||||||||||||||
| instr-driver <dyninst-mutator> <host-executable> | ||||||||||||||||||||
| ``` | ||||||||||||||||||||
|
|
||||||||||||||||||||
| The host executable contains the host code and the fat binary which contains device code. | ||||||||||||||||||||
|
|
||||||||||||||||||||
| Ensure that the build directory for these tools is appended to `PATH` | ||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| #include <cassert> | ||
| #include <fstream> | ||
| #include <iostream> | ||
|
|
||
| #include "elfio/elfio.hpp" | ||
|
|
||
| // usage: | ||
| // extract-fatbin <exec> <output-fatbin> | ||
|
|
||
| static ELFIO::section *getSection(const std::string §ionName, const ELFIO::elfio &file) { | ||
| for (const auto §ion: file.sections) { | ||
| if (section->get_name() == sectionName) { | ||
| return section.get(); | ||
| } | ||
| } | ||
| return nullptr; | ||
| } | ||
|
|
||
| static ELFIO::section *getFatbinSection(const ELFIO::elfio &file) { | ||
| return getSection(".hip_fatbin", file); | ||
| } | ||
|
|
||
| int main(int argc, char **argv) { | ||
| if (argc != 3) { | ||
| std::cerr << "Usage: " << argv[0] << " <executable file with fatbin> <output fatbin name>" << std::endl; | ||
| return 1; | ||
| } | ||
|
|
||
| std::string execFilePath(argv[1]); | ||
| std::string outputFatbinPath(argv[2]); | ||
| ELFIO::elfio execFile; | ||
| if (!execFile.load(execFilePath)) { | ||
| std::cerr << "can't find or process ELF file " << execFilePath << '\n'; | ||
| exit(1); | ||
| } | ||
|
|
||
| ELFIO::section *fatbinSection = getFatbinSection(execFile); | ||
| if (!fatbinSection) { | ||
| std::cerr << ".hip_fatbin section not found in " << execFilePath << "\n"; | ||
| exit(1); | ||
| } | ||
|
|
||
| // Write fatbin to a separate file | ||
| std::ofstream fatbinFile(outputFatbinPath, std::ios::out | std::ios::binary); | ||
|
|
||
| fatbinFile.write(fatbinSection->get_data(), fatbinSection->get_size()); | ||
|
|
||
| return 0; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| #include <cassert> | ||
| #include <fstream> | ||
| #include <iostream> | ||
| #include <string> | ||
|
|
||
| static void showHelp(const std::string &toolName) { | ||
| std::cerr << "Usage : " << toolName << " <arch-name> " | ||
| << "<path-to-fatbin>" << " <path-to-output-gpubin>" << std::endl; | ||
| std::cerr << "supported architectures : gfx900, gfx906, gfx908, gfx90a, gfx940" << std::endl; | ||
|
rochauha marked this conversation as resolved.
|
||
| } | ||
|
|
||
| int main(int argc, char *argv[]) { | ||
| if (argc != 4) { | ||
| showHelp(argv[0]); | ||
| exit(1); | ||
| } | ||
|
|
||
| std::string arch(argv[1]); | ||
| std::string fatbinPath(argv[2]); | ||
| std::string gpubinPath(argv[3]); | ||
|
|
||
| std::ifstream fatbin(fatbinPath, std::ios::binary); | ||
| if (!fatbin) { | ||
| std::cerr << "error : can't open " << fatbinPath << std::endl; | ||
| exit(1); | ||
| } | ||
|
|
||
| // This is at the beginning of the clang-offload-bundle file. | ||
| // See https://clang.llvm.org/docs/ClangOffloadBundler.html | ||
| constexpr std::string_view magicString("__CLANG_OFFLOAD_BUNDLE__"); | ||
| constexpr uint32_t magicStringLength = magicString.length(); | ||
|
|
||
| char buffer[magicStringLength + 1]; | ||
| fatbin.read(buffer, magicStringLength); | ||
| buffer[magicStringLength] = 0; | ||
|
|
||
| assert(std::string(buffer) == magicString); | ||
|
|
||
| uint64_t numBundleEntries = 0; | ||
| fatbin.read(reinterpret_cast<char *>(&numBundleEntries), sizeof(numBundleEntries)); | ||
|
|
||
| uint64_t elfStart = 0; | ||
| uint64_t elfSize = 0; | ||
| bool found = false; | ||
|
|
||
| // Read metadata for each elf object in this bundle | ||
| while (numBundleEntries) { | ||
| uint64_t bundleEntryCodeObjectOffset; // offset from begining of the fatbin | ||
| fatbin.read(reinterpret_cast<char *>(&bundleEntryCodeObjectOffset), | ||
| sizeof(bundleEntryCodeObjectOffset)); | ||
|
|
||
| uint64_t size; | ||
| fatbin.read(reinterpret_cast<char *>(&size), sizeof(size)); | ||
|
|
||
| uint64_t idLength; | ||
| fatbin.read(reinterpret_cast<char *>(&idLength), sizeof(idLength)); | ||
|
|
||
| std::string idString; | ||
| idString.resize(idLength); | ||
| fatbin.read(&idString[0], idLength); | ||
|
|
||
| // If idString ends with arch | ||
| if (idString.substr(idLength - arch.length()) == arch) { | ||
| elfStart = bundleEntryCodeObjectOffset; | ||
| elfSize = size; | ||
| found = true; | ||
| } | ||
| numBundleEntries--; | ||
| } | ||
|
|
||
| if (!found) { | ||
| std::cerr << fatbinPath << " doesn't contain a " << arch << " binary\n"; | ||
| exit(1); | ||
| } | ||
|
|
||
| // std::cout << arch << ' ' << "ELF at " << elfStart << " of size " << elfSize << '\n'; | ||
|
|
||
| fatbin.seekg(elfStart, std::ios::beg); | ||
| std::string data; | ||
| data.resize(elfSize); | ||
| fatbin.read(&data[0], elfSize); | ||
|
Comment on lines
+80
to
+81
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be a problem is the fatbin arch is large as the entire thing is put in memory. OK for now, but it would be better to use a fixed sized buffer and a loop to read/write the data. At least add a comment to fix this in the future. |
||
|
|
||
| std::ofstream elfBin(gpubinPath, std::ios::binary); | ||
|
|
||
| if (!elfBin) { | ||
| std::cerr << "error : can't create " << gpubinPath << std::endl; | ||
| exit(1); | ||
| } | ||
|
|
||
| elfBin.write(&data[0], elfSize); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,82 @@ | ||
| #!/bin/bash | ||
|
|
||
| # TODO : Make this script more like a CLI tool | ||
|
|
||
| MUTATOR=$1 | ||
|
|
||
| EXEC_IN=$2 | ||
| EXEC_UPDATED=$EXEC_IN.updated | ||
| EXEC_RENAMED1=$EXEC_UPDATED.renamed1 | ||
| EXEC_RENAMED2=$EXEC_UPDATED.renamed2 | ||
| EXEC_OUT=$EXEC_IN.out | ||
|
|
||
| FATBIN=$EXEC_IN.fatbin | ||
| FATBIN_UPDATED=$FATBIN.updated | ||
|
|
||
| GPUBIN=$FATBIN.gfx908 | ||
| GPUBIN_INSTR=$GPUBIN-instr | ||
| GPUBIN_UPDATED=$GPUBIN_INSTR | ||
| GPUBIN_UPDATED_NOTE=$GPUBIN_INSTR.updated-note | ||
| GPUBIN_FINAL=$GPUBIN.final | ||
|
|
||
| # Contains names of instrumented kernels | ||
| NAMES_FILE=$GPUBIN.instrumentedKernelNames | ||
|
|
||
| NOTE_IN=$GPUBIN.note | ||
| NOTE_OUT=$NOTE_IN.updated | ||
|
|
||
| PRELOAD_NAMES=$NAMES_FILE.preload | ||
|
|
||
| # 1. Extract fatbin. This will output a $FATBIN | ||
| extract-fatbin "$EXEC_IN" "$FATBIN" | ||
|
|
||
| # 2. Extract gfx908 bin. This will output $GPUBIN | ||
| extract-gpubin gfx908 "$FATBIN" "$GPUBIN" | ||
|
|
||
| # 3. Run the mutator, instrument kernels (also use the information from step 3). | ||
| # This will also emit a file containing list of instrumented kernels ($NAMES_FILE) | ||
| # $MUTATOR -procedure-count $GPUBIN | ||
| "$MUTATOR" "$GPUBIN" | ||
|
|
||
| # 4. Modify the note metadata | ||
| # | ||
| # 4.1 Extract the note section from original binary | ||
| llvm-objcopy --dump-section=.note="$NOTE_IN" "$GPUBIN" | ||
|
|
||
| # 4.2 For each instrumented kernel, modify the metadata as follows: | ||
| # - Update the kernarg signature with 1 additional argument, which is the additional memory | ||
| # that we will allocate via the host in the preload library. | ||
| # - Increase SGPR usage to 112 (GFX908 only for now) | ||
| # Outputs $NOTE_OUT and $PRELOAD_NAMES | ||
| update-note "$NAMES_FILE" "$NOTE_IN" "$NOTE_OUT" "$PRELOAD_NAMES" | ||
|
|
||
| # 4.3 Copy the updated binary, remove the note section | ||
| cp "$GPUBIN_INSTR" "$GPUBIN_UPDATED_NOTE" | ||
| llvm-objcopy --remove-section=.note "$GPUBIN_UPDATED_NOTE" | ||
|
|
||
| # 4.4 Add the expanded note section | ||
| llvm-objcopy --add-section=.note="$NOTE_OUT" "$GPUBIN_UPDATED_NOTE" | ||
|
|
||
| # 4.5 Update the program header for the notes section | ||
| update-note-phdr "$GPUBIN_UPDATED_NOTE" "$GPUBIN_FINAL" | ||
|
|
||
| # 5. Update original fatbin with instrumented gpu binary ($GPUBIN_FINAL) | ||
| # This will emit $FATBIN_UPDATED | ||
| update-fatbin gfx908 "$GPUBIN_FINAL" "$FATBIN" "$FATBIN_UPDATED" | ||
|
|
||
| # 6. Update the original executable ($EXEC_IN) by embedding $FATBIN_UPDATED | ||
| # This will emit $EXEC_UPDATED | ||
| update-exec "$EXEC_IN" "$FATBIN_UPDATED" "$EXEC_UPDATED" | ||
|
|
||
| # 7. Rename fatbin sections so that roc-obj* tools work with the modified executable. | ||
| # Those tools specifically look for the .hip_fatbin section by name. | ||
| # - Rename .hip_fatbin section to .old_fatbin | ||
| # - Rename .new_fatbin section to .hip_fatbin | ||
| # It is possible to do this within the update-exec tool, but doing it here is simpler | ||
| # and less error-prone | ||
| llvm-objcopy --rename-section .hip_fatbin=.old_fatbin "$EXEC_UPDATED" "$EXEC_RENAMED1" | ||
| llvm-objcopy --rename-section .new_fatbin=.hip_fatbin "$EXEC_RENAMED1" "$EXEC_RENAMED2" | ||
|
|
||
| cp $EXEC_RENAMED2 $EXEC_OUT | ||
|
|
||
| chmod +x $EXEC_OUT |
Uh oh!
There was an error while loading. Please reload this page.