diff --git a/profile/device/aie_trace/client/aie_trace_offload_client.h b/profile/device/aie_trace/client/aie_trace_offload_client.h index 1d04223f..184effc0 100644 --- a/profile/device/aie_trace/client/aie_trace_offload_client.h +++ b/profile/device/aie_trace/client/aie_trace_offload_client.h @@ -28,8 +28,8 @@ extern "C" { - #include - #include +#include +#include } namespace xdp { diff --git a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp index 83a514b5..6353b07b 100644 --- a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp +++ b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp @@ -1,3 +1,4 @@ +// VE2 class /** * Copyright (C) 2019-2022 Xilinx, Inc * Copyright (C) 2022-2024 Advanced Micro Devices, Inc. - All rights reserved @@ -17,6 +18,7 @@ #define XDP_PLUGIN_SOURCE +#include #include #include "core/include/xrt.h" @@ -38,9 +40,6 @@ #include #include - - - namespace xdp { @@ -51,7 +50,12 @@ AIETraceOffload::AIETraceOffload , bool isPlio , uint64_t totalSize , uint64_t numStrm +#if defined(XDP_VE2_BUILD) && defined(XDP_VE2_ZOCL_BUILD) , XAie_DevInst* devInstance +#elif defined(XDP_VE2_BUILD) + , xrt::hw_context ctx + , std::shared_ptr md +#endif ) : deviceHandle(handle) , deviceId(id) @@ -66,8 +70,12 @@ AIETraceOffload::AIETraceOffload , offloadStatus(AIEOffloadThreadStatus::IDLE) , mEnCircularBuf(false) , mCircularBufOverwrite(false) +#if defined(XDP_VE2_BUILD) && defined(XDP_VE2_ZOCL_BUILD) , devInst(devInstance) - +#elif defined(XDP_VE2_BUILD) + , context(ctx) + , metadata(md) +#endif { bufAllocSz = deviceIntf->getAlignedTraceBufSize(totalSz, static_cast(numStream)); @@ -85,6 +93,7 @@ AIETraceOffload::~AIETraceOffload() offloadThread.join(); } +#ifdef XDP_VE2_ZOCL_BUILD bool AIETraceOffload::initReadTrace() { // Submit nop.elf to initialize AIE array before BD configuration @@ -161,8 +170,8 @@ bool AIETraceOffload::initReadTrace() // Compute BD: use metadata value if set, otherwise channelNumber * 4 uint16_t bdNum = (traceGMIO->bufferDescriptorId != UINT16_MAX) - ? traceGMIO->bufferDescriptorId - : channelNumber * 4; + ? traceGMIO->bufferDescriptorId + : channelNumber * 4; std::stringstream bdMsg; bdMsg << "AIE Trace: Using BD " << bdNum << " for channel " << (int)channelNumber << " on shim column " << (int)traceGMIO->shimColumn; @@ -177,7 +186,142 @@ bool AIETraceOffload::initReadTrace() bufferInitialized = true; return bufferInitialized; } +#else // XDNA flow +bool AIETraceOffload::initReadTrace() +{ + xrt_core::message::send(severity_level::info, "XRT", "Starting configuration for VE2."); + + buffers.clear(); + buffers.resize(numStream); + + xdp::aie::driver_config meta_config = metadata->getAIEConfigMetadata(); + XAie_Config cfg{ + meta_config.hw_gen, + meta_config.base_address, + meta_config.column_shift, + meta_config.row_shift, + meta_config.num_rows, + meta_config.num_columns, + meta_config.shim_row, + meta_config.mem_row_start, + meta_config.mem_num_rows, + meta_config.aie_tile_row_start, + meta_config.aie_tile_num_rows, + {0} // PartProp + }; + + auto RC = XAie_CfgInitialize(&aieDevInst, &cfg); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::warning, "XRT", "AIE TRACE OFFLOAD: AIE Driver Initialization Failed."); + return false; + } + + tranxHandler = std::make_unique(); + if (!tranxHandler->initializeTransaction(&aieDevInst, "AieTraceOffload")) + return false; + + // gmioDMAInsts.clear(); + // gmioDMAInsts.resize(numStream); + + // for (uint64_t i = 0; i < numStream ; ++i) { + // xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + // "Allocating trace buffer of size " + std::to_string(bufAllocSz) + " for AIE Stream " + // + std::to_string(i)); + // xrt_bos.emplace_back(xrt::bo(context.get_device(), bufAllocSz, + // XRT_BO_FLAGS_HOST_ONLY, tranxHandler->getGroupID(0, context))); + + // buffers[i].bufId = xrt_bos.size(); + // if (!buffers[i].bufId) { + // bufferInitialized = false; + // return bufferInitialized; + // } + + // if (!xrt_bos.empty()) { + // auto bo_map = xrt_bos.back().map(); + // memset(bo_map, 0, bufAllocSz); + // } + + // VPDatabase* db = VPDatabase::Instance(); + // TraceGMIO* traceGMIO = (db->getStaticInfo()).getTraceGMIO(deviceId, i); + + // // channelNumber: (0-S2MM0,1-S2MM1,2-MM2S0,3-MM2S1) + // // Enable shim DMA channel, need to start first so the status is correct + // uint16_t channelNumber = (traceGMIO->channelNumber > 1) ? (traceGMIO->channelNumber - 2) : traceGMIO->channelNumber; + // XAie_DmaDirection dir = (traceGMIO->channelNumber > 1) ? DMA_MM2S : DMA_S2MM; + + // gmioDMAInsts[i].gmioTileLoc = XAie_TileLoc(traceGMIO->shimColumn, 0); + + // uint16_t bdNum = (traceGMIO->bufferDescriptorId != UINT16_MAX) ? traceGMIO->bufferDescriptorId : channelNumber * 4; + // std::stringstream bdMsg; + // bdMsg << "AIE Trace: Using BD " << bdNum << " for channel " << (int)channelNumber << " on shim column " << (int)traceGMIO->shimColumn; + // xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", bdMsg.str()); + + // RC = XAie_DmaDescInit(&aieDevInst, &(gmioDMAInsts[i].shimDmaInst), gmioDMAInsts[i].gmioTileLoc); + // RC = XAie_DmaChannelEnable(&aieDevInst, gmioDMAInsts[i].gmioTileLoc, channelNumber, dir); + // RC = XAie_DmaSetAxi(&(gmioDMAInsts[i].shimDmaInst), 0, traceGMIO->burstLength, 0, 0, 0); + // // cannot do XAie_MemAttach(devInst, &memInst, 0, 0, 0, prop, boExportHandle); since no boExportHandle + // RC = XAie_DmaSetAddrLen(&(gmioDMAInsts[i].shimDmaInst), xrt_bos[i].address(), static_cast(bufAllocSz)); + // RC = XAie_DmaEnableBd(&(gmioDMAInsts[i].shimDmaInst)); // TODO: NOT IN NPU3 + // RC = XAie_DmaWriteBd(&aieDevInst, &(gmioDMAInsts[i].shimDmaInst), gmioDMAInsts[i].gmioTileLoc, bdNum); // Write to shim DMA BD AxiMM registers + // RC = XAie_DmaChannelPushBdToQueue(&aieDevInst, gmioDMAInsts[i].gmioTileLoc, channelNumber, dir, bdNum); + // } + + for (uint64_t i = 0; i < numStream; ++i) { + VPDatabase* db = VPDatabase::Instance(); + TraceGMIO* traceGMIO = (db->getStaticInfo()).getTraceGMIO(deviceId, i); + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Allocating trace buffer of size " + std::to_string(bufAllocSz) + " for AIE Stream " + + std::to_string(i)); + xrt_bos.emplace_back(xrt::bo(context.get_device(), bufAllocSz, + XRT_BO_FLAGS_HOST_ONLY, tranxHandler->getGroupID(0, context))); + + buffers[i].bufId = xrt_bos.size(); + if (!buffers[i].bufId) { + bufferInitialized = false; + return bufferInitialized; + } + + if (!xrt_bos.empty()) { + auto bo_map = xrt_bos.back().map(); + memset(bo_map, 0, bufAllocSz); + } + + XAie_LocType loc; + XAie_DmaDesc dmaDesc; + loc = XAie_TileLoc(traceGMIO->shimColumn, 0); + + uint16_t channelNumber = (traceGMIO->channelNumber > 1) ? (traceGMIO->channelNumber - 2) : traceGMIO->channelNumber; + XAie_DmaDirection dmaDir = (traceGMIO->channelNumber > 1) ? DMA_MM2S : DMA_S2MM; + + // Compute BD: use metadata value if set, otherwise channelNumber * 4 + uint16_t bdNum = (traceGMIO->bufferDescriptorId != UINT16_MAX) ? traceGMIO->bufferDescriptorId : channelNumber * 4; + std::stringstream bdMsg; + bdMsg << "AIE Trace: Using BD " << bdNum << " for channel " << (int)channelNumber << " on shim column " << (int)traceGMIO->shimColumn; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", bdMsg.str()); + + RC = XAie_DmaDescInit(&aieDevInst, &dmaDesc, loc); + RC = XAie_DmaChannelEnable(&aieDevInst, loc, channelNumber, dmaDir); + RC = XAie_DmaSetAxi(&dmaDesc, 0, traceGMIO->burstLength, 0, 0, 0); + RC = XAie_DmaSetAddrLen(&dmaDesc, xrt_bos[i].address(), static_cast(bufAllocSz)); + RC = XAie_DmaEnableBd(&dmaDesc); + RC = XAie_DmaWriteBd(&aieDevInst, &dmaDesc, loc, bdNum); + RC = XAie_DmaChannelPushBdToQueue(&aieDevInst, loc, channelNumber, dmaDir, bdNum); + } + + if (!tranxHandler->submitTransaction(&aieDevInst, context)) + return false; + + xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE Trace Offloading VE2."); + + bufferInitialized = true; + return bufferInitialized; +} +#endif + +// TODO: NPU3 does not have lines 199-213. why? + +#ifdef XDP_VE2_ZOCL_BUILD void AIETraceOffload::endReadTrace() { // reset @@ -187,7 +331,7 @@ void AIETraceOffload::endReadTrace() if (isPLIO) { deviceIntf->resetAIETs2mm(i); -// deviceIntf->freeTraceBuf(b.bufId); + // deviceIntf->freeTraceBuf(b.bufId); } else { VPDatabase* db = VPDatabase::Instance(); TraceGMIO* traceGMIO = (db->getStaticInfo()).getTraceGMIO(deviceId, i); @@ -204,6 +348,37 @@ void AIETraceOffload::endReadTrace() } bufferInitialized = false; } +#else // XDNA +void AIETraceOffload::endReadTrace() +{ + // // reset + // for (uint64_t i = 0; i < numStream ; ++i) { + // if (!buffers[i].bufId) + // continue; + + // VPDatabase* db = VPDatabase::Instance(); + // TraceGMIO* traceGMIO = (db->getStaticInfo()).getTraceGMIO(deviceId, i); + + // // channelNumber: (0-S2MM0,1-S2MM1,2-MM2S0,3-MM2S1) + // // Enable shim DMA channel, need to start first so the status is correct + // uint16_t channelNumber = (traceGMIO->channelNumber > 1) ? (traceGMIO->channelNumber - 2) : traceGMIO->channelNumber; + // XAie_DmaDirection dir = (traceGMIO->channelNumber > 1) ? DMA_MM2S : DMA_S2MM; + + // XAie_DmaChannelDisable(&aieDevInst, gmioDMAInsts[i].gmioTileLoc, channelNumber, dir); + + // buffers[i].bufId = 0; + // } + // bufferInitialized = false; + + for (uint64_t i = 0; i < numStream ; ++i) { + if (!buffers[i].bufId) + continue; + + buffers[i].bufId = 0; + } + bufferInitialized = false; +} +#endif void AIETraceOffload::readTraceGMIO(bool final) { @@ -225,8 +400,13 @@ void AIETraceOffload::readTraceGMIO(bool final) } } +// TODO: only for zocl right now since xdna does not support plio right now, and this function is only for plio void AIETraceOffload::readTracePLIO(bool final) { + #if defined(XDP_VE2_BUILD) && ! defined(XDP_VE2_ZOCL_BUILD) + return; + #endif + if (mCircularBufOverwrite) return; @@ -310,6 +490,7 @@ void AIETraceOffload::readTracePLIO(bool final) } } +#ifdef XDP_VE2_ZOCL_BUILD uint64_t AIETraceOffload::syncAndLog(uint64_t index) { auto& bd = buffers[index]; @@ -349,6 +530,39 @@ uint64_t AIETraceOffload::syncAndLog(uint64_t index) traceLogger->addAIETraceData(index, hostBuf, nBytes, mEnCircularBuf); return nBytes; } +#else // XDNA +uint64_t AIETraceOffload::syncAndLog(uint64_t index) +{ + auto& bd = buffers[index]; + + if (bd.offset >= bd.usedSz) + return 0; + + // Amount of newly written trace + uint64_t nBytes = bd.usedSz - bd.offset; + + // Sync to host + xrt_bos[index].sync(XCL_BO_SYNC_BO_FROM_DEVICE, nBytes, bd.offset); + auto in_bo_map = xrt_bos[index].map() + bd.offset; + + if (!in_bo_map) + return 0; + + // Find amount of non-zero data in buffer + if (!isPLIO) + nBytes = searchWrittenBytes((void*)in_bo_map, bufAllocSz); + + // check for full buffer + if ((bd.offset + nBytes >= bufAllocSz) && !mEnCircularBuf) { + bd.isFull = true; + bd.offloadDone = true; + } + + // Log nBytes of trace + traceLogger->addAIETraceData(index, (void*)in_bo_map, nBytes, mEnCircularBuf); + return nBytes; +} +#endif bool AIETraceOffload::isTraceBufferFull() { @@ -360,8 +574,13 @@ bool AIETraceOffload::isTraceBufferFull() return false; } +// TODO: only for zocl right now since xdna does not support plio right now, and this function is only for plio void AIETraceOffload::checkCircularBufferSupport() { + #if defined(XDP_VE2_BUILD) && ! defined(XDP_VE2_ZOCL_BUILD) + return; + #endif + mEnCircularBuf = xrt_core::config::get_aie_trace_settings_reuse_buffer(); if (!mEnCircularBuf) return; @@ -458,9 +677,9 @@ void AIETraceOffload::offloadFinished() uint64_t AIETraceOffload::searchWrittenBytes(void* buf, uint64_t bytes) { /* - * Look For trace boundary using binary search. - * Use Dword to be safe - */ + * Look For trace boundary using binary search. + * Use Dword to be safe + */ auto words = static_cast(buf); uint64_t wordcount = bytes / TRACE_PACKET_SIZE; diff --git a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h index e6c996c3..fad23355 100644 --- a/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h +++ b/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h @@ -18,13 +18,27 @@ #ifndef XDP_PROFILE_AIE_TRACE_OFFLOAD_VE2_H_ #define XDP_PROFILE_AIE_TRACE_OFFLOAD_VE2_H_ +#include +#include +#include +#include +#include + +#include "core/include/xrt/xrt_bo.h" +#include "core/include/xrt/xrt_hw_context.h" + #include "xdp/profile/device/tracedefs.h" +#include "xdp/profile/plugin/aie_trace/aie_trace_metadata.h" -extern "C" -{ - #include "xaiengine/xaiegbl.h" - #include +#if defined(XDP_VE2_BUILD) && defined(XDP_VE2_ZOCL_BUILD) +// Edge ZOCL: xaiengine only (no aie_codegen / ve2_transaction). +extern "C" { +#include +#include } +#else +#include "xdp/profile/device/common/ve2/ve2_transaction.h" +#endif namespace xdp { @@ -71,14 +85,23 @@ enum class AIEOffloadThreadStatus { class AIETraceOffload { public: + // ZOCL edge: live devInst pointer. VE2 XDNA (client-style): hw_context + metadata. +#if defined(XDP_VE2_BUILD) && ! defined(XDP_VE2_ZOCL_BUILD) AIETraceOffload(void* handle, uint64_t id, PLDeviceIntf*, AIETraceLogger*, bool isPlio, uint64_t totalSize, uint64_t numStrm, - XAie_DevInst* devInstance - ); - + xrt::hw_context context, + std::shared_ptr metadata); +#else + AIETraceOffload(void* handle, uint64_t id, + PLDeviceIntf*, AIETraceLogger*, + bool isPlio, + uint64_t totalSize, + uint64_t numStrm, + XAie_DevInst* devInstance); +#endif virtual ~AIETraceOffload(); public: @@ -106,7 +129,16 @@ class AIETraceOffload uint64_t deviceId; PLDeviceIntf* deviceIntf; AIETraceLogger* traceLogger; +#if defined(XDP_VE2_BUILD) && ! defined(XDP_VE2_ZOCL_BUILD) + std::unique_ptr tranxHandler; + xrt::hw_context context; + std::shared_ptr metadata; + std::vector xrt_bos; + XAie_DevInst aieDevInst = {0}; +#else XAie_DevInst* devInst; + std::vector gmioDMAInsts; +#endif bool isPLIO; uint64_t totalSz; @@ -117,8 +149,6 @@ class AIETraceOffload //Internal use only // Set this for verbose trace offload bool m_debug = false; - std::vector gmioDMAInsts; - // Continuous Trace Offload (For PLIO) bool traceContinuous; @@ -146,4 +176,4 @@ class AIETraceOffload } -#endif +#endif \ No newline at end of file diff --git a/profile/device/common/aie_driver_common_util.cpp b/profile/device/common/aie_driver_common_util.cpp index 03beb487..35c0b7a1 100644 --- a/profile/device/common/aie_driver_common_util.cpp +++ b/profile/device/common/aie_driver_common_util.cpp @@ -28,8 +28,13 @@ #include extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } #include "core/common/message.h" diff --git a/profile/device/common/client_transaction.cpp b/profile/device/common/client_transaction.cpp index 9e649b35..5cef9e64 100644 --- a/profile/device/common/client_transaction.cpp +++ b/profile/device/common/client_transaction.cpp @@ -24,8 +24,13 @@ #include "transactions/op_buf.hpp" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } // *************************************************************** diff --git a/profile/device/common/transactions/op_init.hpp b/profile/device/common/transactions/op_init.hpp index 69e84010..c6ca79fa 100644 --- a/profile/device/common/transactions/op_init.hpp +++ b/profile/device/common/transactions/op_init.hpp @@ -3,8 +3,13 @@ #ifndef __OPINIT_HPP__ #define __OPINIT_HPP__ +extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#else #include - +#endif +} #include "op_types.h" #include diff --git a/profile/device/common/ve2/ve2_transaction.cpp b/profile/device/common/ve2/ve2_transaction.cpp new file mode 100644 index 00000000..b46d7f8c --- /dev/null +++ b/profile/device/common/ve2/ve2_transaction.cpp @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2026 Advanced Micro Devices, Inc. All rights reserved + +#include + +#include "ve2_transaction.h" +#include "core/common/message.h" +#include "xrt/experimental/xrt_elf.h" +#include "xrt/experimental/xrt_ext.h" +#include "xrt/experimental/xrt_module.h" +#include "xrt/xrt_hw_context.h" +#include "xrt/xrt_kernel.h" + +#include "core/common/aiebu/src/cpp/include/aiebu/aiebu_assembler.h" +#include "core/common/aiebu/src/cpp/include/aiebu/aiebu_error.h" + +#include +#include +#include +#include + +#include + +extern "C" { + #include + #include +} + +namespace xdp::aie { + using severity_level = xrt_core::message::severity_level; + + bool VE2Transaction::initializeTransaction(XAie_DevInst* aieDevInst, std::string tName) + { + setTransactionName(tName); + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Writing to New Control Code ASM file: " + getAsmFileName()); + + try { + // On VE2 Linux, the default IO backend is Linux IO which tries to open + // the AIE character device — this fails on XDNA (PCIe NPU) since the + // AIE is managed by the XDNA driver, not the Linux AIE driver. + // Explicitly switch to control-code backend before opening the ASM file. + XAie_SetIOBackend(aieDevInst, XAIE_IO_BACKEND_CONTROLCODE); + XAie_OpenControlCodeFile(aieDevInst, getAsmFileName().c_str(), 8192); + XAie_StartNewJob(aieDevInst, XAIE_START_JOB); + return true; + } + catch(const std::exception& e) { + xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", + "Error in generating asm File: " + getAsmFileName() + "\n" + e.what()); + } + xrt_core::message::send(severity_level::warning, "XRT", "AIE Transaction Initialization Failed."); + return false; + } + + bool VE2Transaction::completeASM(XAie_DevInst* aieDevInst) + { + // + // 1. End generation of ASM file + // + try { + XAie_EndJob(aieDevInst); + XAie_EndPage(aieDevInst); + XAie_CloseControlCodeFile(aieDevInst); + } + catch(const std::exception& e) { + xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", + "Error in generating ASM file: " + getAsmFileName() + "\n" + e.what()); + return false; + } + return true; + } + + bool VE2Transaction::generateELF() + { + // + // 2. Convert ASM to ELF + // + // Fill this vector with ASM content + std::vector control_code_buf; + std::vector libpaths; + libpaths.push_back("./"); + + try { + //Read ASM file + std::string asmFileName = getAsmFileName(); + if (!std::filesystem::exists(asmFileName)) + throw std::runtime_error("file:" + asmFileName + " not found\n"); + + std::ifstream inAsm(asmFileName, std::ios::in | std::ios::binary); + std::cout << "Open file " << asmFileName << std::endl; + + auto file_size = std::filesystem::file_size(asmFileName); + control_code_buf.resize(file_size); + + inAsm.read(control_code_buf.data(), file_size); + std::streamsize bytesRead = inAsm.gcount(); + if (static_cast(bytesRead) != static_cast(file_size)) { + std::cerr << "Read " << bytesRead << " bytes but expected " << file_size + << " for file " << asmFileName << '\n'; + control_code_buf.resize(static_cast(bytesRead)); // keep only read bytes + } else { + std::cout << "ASM file read (" << file_size << " bytes): " << asmFileName << '\n'; + } + + //Convert ASM to ELF data. + auto as = aiebu::aiebu_assembler(aiebu::aiebu_assembler::buffer_type::asm_aie2ps, + control_code_buf, std::vector{}, libpaths); + + //Write elf data to a file + auto e = as.get_elf(); + std::cout << "Elf size:" << e.size() << std::endl; + std::ofstream outElf(getElfFileName(), std::ios_base::binary); + outElf.write(e.data(), e.size()); + } + catch(const std::exception& e) { + xrt_core::message::send(xrt_core::message::severity_level::error, "XRT", + "Error in generating Elf file: " + getElfFileName() + "\n" + e.what()); + return false; + } + return true; + } + + bool VE2Transaction::submitELF(xrt::hw_context hwContext) + { + // + // 3. Submit ELF to microcontroller + // + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Start New Control Code Elf"); + xrt::elf profileElf; + try { + profileElf = xrt::elf(getElfFileName()); + } + catch (...) { + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", + "Failed to load " + getElfFileName() + ". Cannot configure AIE to profile."); + return false; + } + + const char* mode = std::getenv("ELF_TO_SUBMIT"); + if (mode && std::strcmp(mode, "offload") == 0 && m_transactionName == "AieTraceMetrics") + return true; + if (mode && std::strcmp(mode, "metrics") == 0 && m_transactionName == "AieTraceOffload") + return true; + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Elf Object Created"); + xrt::module mod{profileElf}; + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Module Created"); + xrt::kernel kernel; + try { + kernel = xrt::ext::kernel{hwContext, mod, "XDP_KERNEL:{IPUV1CNN}"}; + } catch (...) { + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", + "XDP_KERNEL not found in HW Context. Unable to run " + getElfFileName()); + return false; + } + + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "XDP_KERNEL created"); + xrt::run run{kernel}; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Kernel run created"); + + run.start(); + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Run started"); + + run.wait2(); + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", "Wait done!"); + + return true; + } + + bool VE2Transaction::submitTransaction(XAie_DevInst* aieDevInst, xrt::hw_context hwContext) + { + if (!completeASM(aieDevInst)) + return false; + if (!generateELF()) + return false; + if (!submitELF(hwContext)) + return false; + return true; + } +} \ No newline at end of file diff --git a/profile/device/common/ve2/ve2_transaction.h b/profile/device/common/ve2/ve2_transaction.h new file mode 100644 index 00000000..41dc701e --- /dev/null +++ b/profile/device/common/ve2/ve2_transaction.h @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2026 Advanced Micro Devices, Inc. All rights reserved + +#ifndef VE2_TRANSACTION_DOT_H +#define VE2_TRANSACTION_DOT_H + +#include +#include +#include + +#include "xrt/xrt_hw_context.h" +#include "xrt/xrt_kernel.h" + +// XDNA-only: pulled from device offload via aie_trace_offload_ve2.h (ZOCL does not use this file). +// xaiegbl_dynlink.h must come before aie_codegen.h so XAIE_AIG_EXPORT is defined for xaie_noc.h et al. +extern "C" { +#include +#include +#include +} + +/** + * @brief VE2Transaction class for generating and submitting VE2 XDNA transactions + * + * This class is used to generate and submit VE2 transactions. It is used to generate the ASM file, the ELF file, and submit the transaction. + * + */ + +namespace xdp::aie { + class VE2Transaction { + public: + VE2Transaction() {}; + bool initializeTransaction(XAie_DevInst* aieDevInst, std::string tName); + bool submitTransaction(XAie_DevInst* aieDevInst, xrt::hw_context hwContext); + bool completeASM(XAie_DevInst* aieDevInst); + bool generateELF(); + bool submitELF(xrt::hw_context hwContext); + + void setTransactionName(std::string newTransactionName) {m_transactionName = newTransactionName;} + std::string getAsmFileName() { return m_transactionName + ".asm"; } + std::string getElfFileName() { return m_transactionName + ".elf"; } + int getGroupID(int id, xrt::hw_context hwContext) { + xrt::kernel kernel = xrt::kernel(hwContext, "XDP_KERNEL"); + return kernel.group_id(id); + } + + private: + std::string m_transactionName; + std::vector m_columns; + std::vector m_rows; + std::vector m_offsets; + std::vector m_values; + }; + +} // namespace xdp::aie + +#endif \ No newline at end of file diff --git a/profile/plugin/aie_base/aie_base_util.h b/profile/plugin/aie_base/aie_base_util.h index baa70ab2..ede8c803 100755 --- a/profile/plugin/aie_base/aie_base_util.h +++ b/profile/plugin/aie_base/aie_base_util.h @@ -11,8 +11,13 @@ #include "xdp/profile/plugin/aie_base/generations/aie_generations.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp::aie { diff --git a/profile/plugin/aie_debug/CMakeLists.txt b/profile/plugin/aie_debug/CMakeLists.txt index 67d71348..9b87a6fc 100644 --- a/profile/plugin/aie_debug/CMakeLists.txt +++ b/profile/plugin/aie_debug/CMakeLists.txt @@ -34,8 +34,8 @@ file(GLOB AIE_DRIVER_COMMON_UTIL_FILES if (XDP_VE2_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_debug_plugin SHARED ${AIE_DEBUG_PLUGIN_FILES}) add_dependencies(xdp_aie_debug_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_debug_plugin PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_debug_plugin PRIVATE FAL_LINUX="on" XDP_VE2_BUILD=1) + target_link_libraries(xdp_aie_debug_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) + target_compile_definitions(xdp_aie_debug_plugin PRIVATE FAL_LINUX="on" XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1) target_include_directories(xdp_aie_debug_plugin PRIVATE ${CMAKE_SOURCE_DIR}/src) install (TARGETS xdp_aie_debug_plugin @@ -46,8 +46,8 @@ if (XDP_VE2_BUILD_CMAKE STREQUAL "yes") elseif (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_debug_plugin SHARED ${AIE_DEBUG_PLUGIN_FILES} ${AIE_DRIVER_COMMON_UTIL_FILES}) add_dependencies(xdp_aie_debug_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_debug_plugin PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_debug_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) + target_link_libraries(xdp_aie_debug_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) + target_compile_definitions(xdp_aie_debug_plugin PRIVATE XDP_CLIENT_BUILD=1 XDP_USE_AIE_CODEGEN=1 -DXAIE_FEATURE_MSVC) target_include_directories(xdp_aie_debug_plugin PRIVATE ${AIERT_DIR}/include) set_target_properties(xdp_aie_debug_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) diff --git a/profile/plugin/aie_debug/aie_debug_metadata.h b/profile/plugin/aie_debug/aie_debug_metadata.h index 1b397227..23d83926 100644 --- a/profile/plugin/aie_debug/aie_debug_metadata.h +++ b/profile/plugin/aie_debug/aie_debug_metadata.h @@ -24,8 +24,13 @@ #include "xdp/profile/plugin/vp_base/vp_base_plugin.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp { diff --git a/profile/plugin/aie_debug/client/aie_debug.h b/profile/plugin/aie_debug/client/aie_debug.h index b27cc418..51c94eea 100755 --- a/profile/plugin/aie_debug/client/aie_debug.h +++ b/profile/plugin/aie_debug/client/aie_debug.h @@ -17,8 +17,8 @@ #include "core/include/xrt/xrt_hw_context.h" extern "C" { - #include - #include + #include + #include } namespace xdp { diff --git a/profile/plugin/aie_debug/ve2/aie_debug.h b/profile/plugin/aie_debug/ve2/aie_debug.h index fe7967a7..1b2a20e2 100755 --- a/profile/plugin/aie_debug/ve2/aie_debug.h +++ b/profile/plugin/aie_debug/ve2/aie_debug.h @@ -18,8 +18,13 @@ #include "xaiefal/xaiefal.hpp" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp { diff --git a/profile/plugin/aie_dtrace/util/aie_dtrace_util.h b/profile/plugin/aie_dtrace/util/aie_dtrace_util.h index 29ac73f0..9fe75466 100644 --- a/profile/plugin/aie_dtrace/util/aie_dtrace_util.h +++ b/profile/plugin/aie_dtrace/util/aie_dtrace_util.h @@ -9,7 +9,11 @@ #include extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#else #include +#endif } namespace xdp::aie::dtrace { diff --git a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.h b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.h index 2a4acbfa..bbf36f04 100644 --- a/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.h +++ b/profile/plugin/aie_dtrace/ve2/aie_dtrace_ve2.h @@ -17,8 +17,13 @@ #include "xaiefal/xaiefal.hpp" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp { diff --git a/profile/plugin/aie_halt/CMakeLists.txt b/profile/plugin/aie_halt/CMakeLists.txt index a67d422b..8bb4cf17 100644 --- a/profile/plugin/aie_halt/CMakeLists.txt +++ b/profile/plugin/aie_halt/CMakeLists.txt @@ -31,8 +31,8 @@ if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") xrt_configure_version_file(xdp_aie_halt_plugin SHARED) add_library(xdp_aie_halt_plugin SHARED xdp_aie_halt_plugin-version.rc ${XDP_AIE_HALT_PLUGIN_FILES} ${XDP_DEVICE_COMMON_FILES}) add_dependencies(xdp_aie_halt_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_halt_plugin PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_halt_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) + target_link_libraries(xdp_aie_halt_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) + target_compile_definitions(xdp_aie_halt_plugin PRIVATE XDP_CLIENT_BUILD=1 XDP_USE_AIE_CODEGEN=1 -DXAIE_FEATURE_MSVC) target_include_directories(xdp_aie_halt_plugin PRIVATE ${AIERT_DIR}/include) set_target_properties(xdp_aie_halt_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) @@ -46,9 +46,9 @@ elseif (XDP_VE2_BUILD_CMAKE STREQUAL "yes") add_dependencies(xdp_aie_halt_plugin xdp_core xrt_coreutil) #target_include_directories(xdp_aie_halt_plugin PRIVATE ${AIERT_DIR}/include) - #target_link_libraries(xdp_aie_halt_plugin PRIVATE xdp_core xrt_coreutil xaiengine) + #target_link_libraries(xdp_aie_halt_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) target_link_libraries(xdp_aie_halt_plugin PRIVATE xdp_core xrt_coreutil) - target_compile_definitions(xdp_aie_halt_plugin PRIVATE XDP_VE2_BUILD=1) + target_compile_definitions(xdp_aie_halt_plugin PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1) set_target_properties(xdp_aie_halt_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) diff --git a/profile/plugin/aie_halt/clientDev/aie_halt.cpp b/profile/plugin/aie_halt/clientDev/aie_halt.cpp index bced7a34..13fd926c 100644 --- a/profile/plugin/aie_halt/clientDev/aie_halt.cpp +++ b/profile/plugin/aie_halt/clientDev/aie_halt.cpp @@ -34,8 +34,8 @@ #include "core/include/xclbin.h" extern "C" { - #include - #include +#include +#include } #ifdef _WIN32 diff --git a/profile/plugin/aie_pc/CMakeLists.txt b/profile/plugin/aie_pc/CMakeLists.txt index 0ca7ffb7..2a88c97e 100644 --- a/profile/plugin/aie_pc/CMakeLists.txt +++ b/profile/plugin/aie_pc/CMakeLists.txt @@ -30,8 +30,8 @@ if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_pc_plugin SHARED xdp_aie_pc_plugin-version.rc ${XDP_AIE_PC_PLUGIN_FILES} ${XDP_DEVICE_COMMON_FILES}) add_dependencies(xdp_aie_pc_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_pc_plugin PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_pc_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) + target_link_libraries(xdp_aie_pc_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) + target_compile_definitions(xdp_aie_pc_plugin PRIVATE XDP_CLIENT_BUILD=1 XDP_USE_AIE_CODEGEN=1 -DXAIE_FEATURE_MSVC) target_include_directories(xdp_aie_pc_plugin PRIVATE ${AIERT_DIR}/include) set_target_properties(xdp_aie_pc_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) diff --git a/profile/plugin/aie_pc/clientDev/aie_pc.cpp b/profile/plugin/aie_pc/clientDev/aie_pc.cpp index f18389f5..9c0ef0a8 100644 --- a/profile/plugin/aie_pc/clientDev/aie_pc.cpp +++ b/profile/plugin/aie_pc/clientDev/aie_pc.cpp @@ -38,9 +38,9 @@ #include "core/include/xclbin.h" extern "C" { - #include - #include - #include + #include + #include + #include } namespace xdp { diff --git a/profile/plugin/aie_pc/clientDev/aie_pc.h b/profile/plugin/aie_pc/clientDev/aie_pc.h index 67647bb4..20810e6d 100644 --- a/profile/plugin/aie_pc/clientDev/aie_pc.h +++ b/profile/plugin/aie_pc/clientDev/aie_pc.h @@ -21,8 +21,8 @@ #include "xdp/profile/plugin/aie_pc/aie_pc_impl.h" extern "C" { - #include - #include + #include + #include } #include diff --git a/profile/plugin/aie_profile/CMakeLists.txt b/profile/plugin/aie_profile/CMakeLists.txt index 7ba9968a..3d7f78ab 100644 --- a/profile/plugin/aie_profile/CMakeLists.txt +++ b/profile/plugin/aie_profile/CMakeLists.txt @@ -43,8 +43,8 @@ if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_profile_plugin SHARED xdp_aie_profile_plugin-version.rc ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} ${AIE_DRIVER_COMMON_UTIL_FILES} ${AIE_PROFILE_UTIL_FILES} ${AIE_JSON_PARSER_FILES}) add_dependencies(xdp_aie_profile_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_profile_plugin PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_profile_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) + target_link_libraries(xdp_aie_profile_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) + target_compile_definitions(xdp_aie_profile_plugin PRIVATE XDP_CLIENT_BUILD=1 XDP_USE_AIE_CODEGEN=1 -DXAIE_FEATURE_MSVC) target_include_directories(xdp_aie_profile_plugin PRIVATE ${AIERT_DIR}/include) set_target_properties(xdp_aie_profile_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) @@ -83,12 +83,19 @@ else() "${IMPL_DIR}/*.h" "${IMPL_DIR}/*.cpp" ) - add_library(xdp_aie_profile_plugin_xdna SHARED ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} ${AIE_PROFILE_UTIL_FILES} ${AIE_PROFILE_CONFIG_FILES} ${AIE_JSON_PARSER_FILES}) + add_library(xdp_aie_profile_plugin_xdna SHARED ${AIE_PROFILE_PLUGIN_FILES} ${AIE_PROFILE_IMPL_FILES} ${AIE_PROFILE_UTIL_FILES} ${AIE_PROFILE_CONFIG_FILES} ${AIE_JSON_PARSER_FILES} "${PROFILE_DIR}/device/common/ve2/ve2_transaction.cpp") add_dependencies(xdp_aie_profile_plugin_xdna xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_profile_plugin_xdna PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_profile_plugin_xdna PRIVATE XDP_VE2_BUILD=1 FAL_LINUX="on") + target_link_libraries(xdp_aie_profile_plugin_xdna PRIVATE xdp_core xrt_coreutil aie_codegen aiebu_library_objects) + # -Bsymbolic: ensures XAie_* calls within this plugin resolve to the + # statically-linked aie_codegen rather than being interposed by the + # system libxaiengine.so.3 (required by libxrt_core.so.2) + target_link_options(xdp_aie_profile_plugin_xdna PRIVATE -Wl,-Bsymbolic) + target_compile_definitions(xdp_aie_profile_plugin_xdna PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 FAL_LINUX="on") target_include_directories(xdp_aie_profile_plugin_xdna PRIVATE ${CMAKE_SOURCE_DIR}/src + ${AIEFAL_DIR} + ${AIEBU_SOURCE_DIR}/src/cpp/include + ${AIERT_DIR}/include ${XRT_SOURCE_DIR}/runtime_src/core/common/elf ) set_target_properties(xdp_aie_profile_plugin_xdna PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) diff --git a/profile/plugin/aie_profile/aie_profile_plugin.cpp b/profile/plugin/aie_profile/aie_profile_plugin.cpp index 789393ed..45017c99 100644 --- a/profile/plugin/aie_profile/aie_profile_plugin.cpp +++ b/profile/plugin/aie_profile/aie_profile_plugin.cpp @@ -168,6 +168,8 @@ namespace xdp { #elif defined(XRT_X86_BUILD) implementation = std::make_unique(db, metadata, deviceID); #elif XDP_VE2_BUILD + xrt::hw_context context = xrt_core::hw_context_int::create_hw_context_from_implementation(handle); + metadata->setHwContext(context); implementation = std::make_unique(db, metadata, deviceID); #else implementation = std::make_unique(db, metadata, deviceID); @@ -244,6 +246,8 @@ auto time = std::time(nullptr); #ifdef XDP_CLIENT_BUILD implementation->poll(0); + #elif defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD) + implementation->poll(implementation->getDeviceID()); #endif implementation->endPoll(); @@ -257,6 +261,13 @@ auto time = std::time(nullptr); #ifdef XDP_CLIENT_BUILD auto& implementation = handleToAIEProfileImpl.begin()->second; implementation->poll(0); + #elif defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD) + for (auto& p : handleToAIEProfileImpl) { + if (!p.second) + continue; + p.second->endPoll(); + p.second->poll(p.second->getDeviceID()); + } #endif // Ask all threads to end for (auto& p : handleToAIEProfileImpl) { diff --git a/profile/plugin/aie_profile/client/aie_profile.h b/profile/plugin/aie_profile/client/aie_profile.h index 863f1fb6..6cc355aa 100644 --- a/profile/plugin/aie_profile/client/aie_profile.h +++ b/profile/plugin/aie_profile/client/aie_profile.h @@ -13,8 +13,8 @@ #include "xdp/profile/device/common/client_transaction.h" extern "C" { -#include -#include +#include +#include } namespace xdp { diff --git a/profile/plugin/aie_profile/util/aie_profile_config.h b/profile/plugin/aie_profile/util/aie_profile_config.h index 4a8643a4..5ba86338 100644 --- a/profile/plugin/aie_profile/util/aie_profile_config.h +++ b/profile/plugin/aie_profile/util/aie_profile_config.h @@ -11,8 +11,13 @@ #include "xdp/profile/database/static_info/aie_constructs.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp::aie::profile { diff --git a/profile/plugin/aie_profile/util/aie_profile_util.h b/profile/plugin/aie_profile/util/aie_profile_util.h index 3cf62984..fc56e369 100644 --- a/profile/plugin/aie_profile/util/aie_profile_util.h +++ b/profile/plugin/aie_profile/util/aie_profile_util.h @@ -11,8 +11,13 @@ #include "xdp/profile/database/static_info/aie_constructs.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp::aie::profile { diff --git a/profile/plugin/aie_profile/ve2/aie_profile.cpp b/profile/plugin/aie_profile/ve2/aie_profile.cpp index a0874065..7b27e8d8 100644 --- a/profile/plugin/aie_profile/ve2/aie_profile.cpp +++ b/profile/plugin/aie_profile/ve2/aie_profile.cpp @@ -31,6 +31,10 @@ #include "core/common/api/hw_context_int.h" #include "shim_ve2/xdna_hwctx.h" +#include "core/common/api/bo_int.h" +#include "xrt/xrt_bo.h" + +#ifdef XDP_VE2_ZOCL_BUILD namespace { static void* fetchAieDevInst(void* devHandle) { @@ -748,3 +752,509 @@ namespace xdp { } } +// END ZOCL flow + +// XDNA flow +#else +namespace xdp { + using tile_type = xdp::tile_type; + using module_type = xdp::module_type; + using severity_level = xrt_core::message::severity_level; + + AieProfile_VE2Impl::AieProfile_VE2Impl(VPDatabase* database, std::shared_ptr metadata, uint64_t deviceID) + : AieProfileImpl(database, metadata, deviceID) + { + auto hwGen = metadata->getHardwareGen(); + + coreStartEvents = aie::profile::getCoreEventSets(hwGen); + coreEndEvents = coreStartEvents; + + memoryStartEvents = aie::profile::getMemoryEventSets(hwGen); + memoryEndEvents = memoryStartEvents; + + shimStartEvents = aie::profile::getInterfaceTileEventSets(hwGen); + shimEndEvents = shimStartEvents; + shimEndEvents[METRIC_BYTE_COUNT] = {XAIE_EVENT_PORT_RUNNING_0_PL, XAIE_EVENT_PERF_CNT_0_PL}; + + memTileStartEvents = aie::profile::getMemoryTileEventSets(hwGen); + memTileEndEvents = memTileStartEvents; + + microcontrollerEvents = aie::profile::getMicrocontrollerEventSets(hwGen); + + tranxHandler = std::make_unique(); + + // Create debug buffer for AIE Profile results + auto context = metadata->getHwContext(); + uint32_t* output = nullptr; + std::map activeUCsegmentMap; + activeUCsegmentMap[0] = 0x20000; + try { + resultBO = xrt_core::bo_int::create_bo(context, 0x20000, xrt_core::bo_int::use_type::uc_debug); + xrt_core::bo_int::config_bo(resultBO, activeUCsegmentMap); + output = resultBO.map(); + memset(output, 0, 0x20000); + } catch (std::exception& e) { + std::stringstream msg; + msg << "Unable to create 128KB buffer for AIE Profile results. Cannot get AIE Profile info. " << e.what() << std::endl; + xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT", msg.str()); + } + } + + void AieProfile_VE2Impl::updateDevice() { + bool runtimeCounters = setMetricsSettings(deviceID, metadata->getHandle()); + if (!runtimeCounters) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE Profile Counters were not found for this design. Please specify " + "graph_based_[aie|aie_memory|memory_tile|interface_tile]_metrics and/or " + "tile_based_[aie|aie_memory|memory_tile|interface_tile|microcontroller]_metrics " + "under \"AIE_profile_settings\" section in your xrt.ini."); + (db->getStaticInfo()).setIsAIECounterRead(deviceID,true); + return; + } + + // Build poll ASM/ELF after metrics are configured; submit is deferred to endPoll() + generatePollElf(); + } + + // Set metrics for all specified AIE counters on this device with configs given in AIE_profile_settings + bool + AieProfile_VE2Impl::setMetricsSettings(const uint64_t deviceId, void* handle) + { + int counterId = 0; + bool runtimeCounters = false; + + xdp::aie::driver_config meta_config = metadata->getAIEConfigMetadata(); + XAie_Config cfg { + meta_config.hw_gen, + meta_config.base_address, + meta_config.column_shift, + meta_config.row_shift, + meta_config.num_rows, + meta_config.num_columns, + meta_config.shim_row, + meta_config.mem_row_start, + meta_config.mem_num_rows, + meta_config.aie_tile_row_start, + meta_config.aie_tile_num_rows, + {0} // PartProp + }; + + auto RC = XAie_CfgInitialize(&aieDevInst, &cfg); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::warning, "XRT", "AIE Driver Initialization Failed."); + return false; + } + + const std::string tranxName = "AieProfileMetrics"; + if (!tranxHandler->initializeTransaction(&aieDevInst, tranxName)) { + xrt_core::message::send(severity_level::warning, "XRT", "Transaction Initialization Failed."); + return false; + } + + auto hwGen = metadata->getHardwareGen(); + auto configChannel0 = metadata->getConfigChannel0(); + auto configChannel1 = metadata->getConfigChannel1(); + uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); + aie::displayColShiftInfo(startColShift); + + for (int module = 0; module < metadata->getNumModules(); ++module) { + auto configMetrics = metadata->getConfigMetricsVec(module); + if (configMetrics.empty()) + continue; + + int numTileCounters[metadata->getNumCountersMod(module)+1] = {0}; + XAie_ModuleType mod = aie::profile::getFalModuleType(module); + + // Iterate over tiles and metrics to configure all desired counters + for (auto& tileMetric : configMetrics) { + auto& metricSet = tileMetric.second; + auto tile = tileMetric.first; + auto col = tile.col + startColShift; + auto row = tile.row; + auto subtype = tile.subtype; + auto type = aie::getModuleType(row, metadata->getAIETileRowOffset()); + if ((mod == XAIE_MEM_MOD) && (type == module_type::core)) + type = module_type::dma; + + // Catch microcontroller event sets for MDM + if (module == static_cast(module_type::uc)) { + // Configure + auto events = microcontrollerEvents[metricSet]; + aie::profile::configMDMCounters(&aieDevInst, hwGen, col, row, events); + // Record + tile_type recordTile; + recordTile.col = col; + recordTile.row = row; + microcontrollerTileEvents[recordTile] = events; + runtimeCounters = true; + continue; + } + + // Ignore invalid types and inactive modules + // NOTE: Inactive core modules are configured when utilizing + // stream switch monitor ports to profile DMA channels + if (!aie::profile::isValidType(type, mod)) + continue; + if ((type == module_type::dma) && !tile.active_memory) + continue; + if ((type == module_type::core) && !tile.active_core) { + if (metadata->getPairModuleIndex(metricSet, type) < 0) + continue; + } + + auto loc = XAie_TileLoc(col, row); + auto startEvents = (type == module_type::core) ? coreStartEvents[metricSet] + : ((type == module_type::dma) ? memoryStartEvents[metricSet] + : ((type == module_type::shim) ? shimStartEvents[metricSet] + : memTileStartEvents[metricSet])); + auto endEvents = (type == module_type::core) ? coreEndEvents[metricSet] + : ((type == module_type::dma) ? memoryEndEvents[metricSet] + : ((type == module_type::shim) ? shimEndEvents[metricSet] + : memTileEndEvents[metricSet])); + std::vector resetEvents = {}; + + int numCounters = 0; + auto numFreeCtr = static_cast(startEvents.size()); + + int numFreeCtrSS = numFreeCtr; + if (aie::profile::profileAPIMetricSet(metricSet)) { + if (numFreeCtr < 2) { + continue; + } + // We need to monitor single stream switch monitor port + // numFreeCtrSS = 1 ; + } + + // Specify Sel0/Sel1 for memory tile events 21-44 + auto iter0 = configChannel0.find(tile); + auto iter1 = configChannel1.find(tile); + uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; + uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; + std::vector channels = {channel0, channel1}; // TODO: do we also add channel 2 & 3 here? + + // Modify events as needed + aie::profile::modifyEvents(type, subtype, channel0, startEvents, metadata->getHardwareGen()); + endEvents = startEvents; + + // TBD : Placeholder to configure AIE core with required profile counters. + configEventSelections(loc, type, metricSet, channels); + // TBD : Placeholder to configure shim tile with required profile counters. + + // TODO: support for VE2 XDNA for profile API metric sets + { + // // Identify the profiling API metric sets and configure graph events + // if (metadata->getUseGraphIterator() && !graphItrBroadcastConfigDone) { + // XAie_Events bcEvent = XAIE_EVENT_NONE_CORE; + // bool status = aie::profile::configGraphIteratorAndBroadcast(aieDevInst, aieDevice, + // metadata, xaieModule, loc, mod, type, metricSet, bcEvent, bcResourcesBytesTx); + // if (status) { + // graphIteratorBrodcastChannelEvent = bcEvent; + // graphItrBroadcastConfigDone = true; + // } + // } + + // if (aie::profile::profileAPIMetricSet(metricSet)) { + // // Re-use the existing port running event for both the counters + // startEvents[startEvents.size()-1] = startEvents[0]; + + // // Use start events as End events for profile counters if threshold is not provided + // endEvents[endEvents.size()-1] = endEvents[0]; + + // // Use the set values broadcast events for the reset of counter + // resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_NONE_CORE}; + // if (type == module_type::shim) { + // if (metadata->getUseGraphIterator()) + // resetEvents = {graphIteratorBrodcastChannelEvent, graphIteratorBrodcastChannelEvent}; + // else + // resetEvents = {XAIE_EVENT_NONE_CORE, XAIE_EVENT_USER_EVENT_1_PL}; + // } + // } + } + + // Request and configure all available counters for this tile + for (int i=0; i < numFreeCtr; ++i) { + auto startEvent = startEvents.at(i); + auto endEvent = endEvents.at(i); + auto resetEvent = XAIE_EVENT_NONE_CORE; + auto portnum = xdp::aie::getPortNumberFromEvent(startEvent); + // For metric sets with multiple stream-switch ports, use modulo for channel mapping + uint8_t channelNum = portnum % 2; + uint8_t channel = (channelNum == 0) ? channel0 : channel1; + + // Configure group event before reserving and starting counter + aie::profile::configGroupEvents(&aieDevInst, loc, mod, type, metricSet, startEvent, channel); + + // TODO: support for VE2 XDNA for profile API metric sets + // // Configure the profile counters for profile APIs metric sets. + // std::shared_ptr perfCounter = nullptr; + if (aie::profile::profileAPIMetricSet(metricSet)) { + // resetEvent = resetEvents.at(i); + // threshold = metadata->getUserSpecifiedThreshold(tileMetric.first, tileMetric.second); + // threshold = aie::profile::convertToBeats(tileMetric.second, threshold, metadata->getHardwareGen()); + + // if (i==0 && threshold>0) + // endEvent = XAIE_EVENT_PERF_CNT_1_PL; + + // if (i==1 && threshold == 0) + // continue; + + // XAie_Events retCounterEvent = XAIE_EVENT_NONE_CORE; + // perfCounter = aie::profile::configProfileAPICounters(aieDevInst, aieDevice, metadata, xaieModule, + // mod, type, metricSet, startEvent, endEvent, resetEvent, i, perfCounters.size(), + // threshold, retCounterEvent, tile, bcResourcesLatency, adfAPIResourceInfoMap, adfAPIBroadcastEventsMap); + // if (!perfCounter) + // continue; + // perfCounters.push_back(perfCounter); + } else { + // No resource manager, so manually manage the counters + RC = XAie_PerfCounterReset(&aieDevInst, loc, mod, i); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::error, "XRT", "AIE Performance Counter Reset Failed."); + break; + } + RC = XAie_PerfCounterControlSet(&aieDevInst, loc, mod, i, startEvent, endEvent); + if (RC != XAIE_OK) { + xrt_core::message::send(severity_level::error, "XRT", "AIE Performance Counter Set Failed."); + break; + } + } + + if (aie::isStreamSwitchPortEvent(startEvent)) + configStreamSwitchPorts(tileMetric.first, loc, type, metricSet, channel, startEvent); + + // Generate user_event_1 for byte count metric set after configuration + if ((metricSet == METRIC_BYTE_COUNT) && (i == 1) && !graphItrBroadcastConfigDone) { + XAie_LocType tileloc = XAie_TileLoc(tile.col, tile.row); + //Note: For BYTE_COUNT metric, user_event_1 is used twice as eventA & eventB to + // to transition the FSM from Idle->State0->State1. + // eventC = Port Running and eventD = stop event (counter event). + XAie_EventGenerate(&aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); + XAie_EventGenerate(&aieDevInst, tileloc, mod, XAIE_EVENT_USER_EVENT_1_PL); + } + + // Convert enums to physical event IDs for reporting purposes + uint16_t tmpStart; + uint16_t tmpEnd; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, startEvent, &tmpStart); + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, endEvent, &tmpEnd); + uint16_t phyStartEvent = tmpStart + aie::profile::getCounterBase(type); + uint16_t phyEndEvent = tmpEnd + aie::profile::getCounterBase(type); + auto payload = channel0; + + // Store counter info in database + std::string counterName = "AIE Counter " + std::to_string(counterId); + (db->getStaticInfo()).addAIECounter(deviceId, counterId, col, row, i, + phyStartEvent, phyEndEvent, resetEvent, payload, metadata->getClockFreqMhz(), + metadata->getModuleName(module), counterName, (tile.stream_ids.empty() ? 0 : tile.stream_ids[0])); + + auto tileOffset = XAie_GetTileAddr(&aieDevInst, row, col); + std::vector Regs = regValues.at(type); + op_profile_data.emplace_back((u32)(Regs[i] + tileOffset)); + + std::vector values; + values.insert(values.end(), {col, row, phyStartEvent, phyEndEvent, resetEvent, 0, 0, payload}); + outputValues.push_back(values); + + counterId++; + numCounters++; + } // numFreeCtr + + std::stringstream msg; + msg << "Reserved " << numCounters << " counters for profiling AIE tile (" << +col + << "," << +row << ") using metric set " << metricSet << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + numTileCounters[numCounters]++; + } // configMetrics + + // Report counters reserved per tile + { + std::stringstream msg; + msg << "AIE profile counters reserved in " << metadata->getModuleName(module) << " - "; + for (int n=0; n <= metadata->getNumCountersMod(module); ++n) { + if (numTileCounters[n] == 0) + continue; + msg << n << ": " << numTileCounters[n] << " tiles, "; + (db->getStaticInfo()).addAIECounterResources(deviceId, n, numTileCounters[n], module); + } + xrt_core::message::send(severity_level::info, "XRT", msg.str().substr(0, msg.str().size()-2)); + } + + runtimeCounters = true; + } // modules + + // Submit transaction if we were able to configure + if (runtimeCounters) { + auto hwCtx = metadata->getHwContext(); + tranxHandler->submitTransaction(&aieDevInst, hwCtx); + xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE Profiling."); + } + + return runtimeCounters; + } + + /**************************************************************************** + * Configure selection index to monitor channel numbers + * NOTE: In NPU3, this is required in memory and interface tiles + ***************************************************************************/ + void + AieProfile_VE2Impl::configEventSelections(const XAie_LocType loc, const module_type type, + const std::string metricSet, std::vector& channels) + { + if ((type != module_type::mem_tile) && (type != module_type::shim)) + return; + + XAie_DmaDirection dmaDir = aie::isInputSet(type, metricSet) ? DMA_S2MM : DMA_MM2S; + uint8_t numChannels = NUM_CHANNEL_SELECTS; + + if (aie::isDebugVerbosity()) { + std::string tileType = (type == module_type::shim) ? "interface" : "memory"; + std::string dmaType = (dmaDir == DMA_S2MM) ? "S2MM" : "MM2S"; + std::stringstream channelsStr; + std::copy(channels.begin(), channels.end(), std::ostream_iterator(channelsStr, ", ")); + + std::string msg = "Configuring event selections for " + tileType + " tile DMA " + + dmaType + " channels " + channelsStr.str(); + xrt_core::message::send(severity_level::debug, "XRT", msg); + } + + for (uint8_t c = 0; c < numChannels; ++c) + XAie_EventSelectDmaChannel(&aieDevInst, loc, c, dmaDir, channels.at(c)); + } + + /**************************************************************************** + * Configure stream switch ports for monitoring purposes + * NOTE: Used to monitor streams: trace, interfaces, and memory tiles + ***************************************************************************/ + void + AieProfile_VE2Impl::configStreamSwitchPorts(const tile_type& tile, const XAie_LocType& loc, + const module_type& type, const std::string& metricSet, + const uint8_t channel, const XAie_Events startEvent) + { + // Hardcoded + uint8_t rscId = 0; + uint8_t portnum = aie::getPortNumberFromEvent(startEvent); + // AIE Tiles (e.g., trace streams) + if (type == module_type::core) { + auto slaveOrMaster = (metricSet.find("mm2s") != std::string::npos) ? + XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, DMA, channel); + std::stringstream msg; + msg << "Configured core tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") + << " stream switch ports for metricset " << metricSet << " and channel " << (int)channel << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + return; + } + + // Interface tiles (e.g., PLIO, GMIO) + if (type == module_type::shim) { + // NOTE: skip configuration of extra ports for tile if stream_ids are not available. + if (portnum >= tile.stream_ids.size()) + return; + // Grab slave/master and stream ID + // NOTE: stored in getTilesForProfiling() above + auto slaveOrMaster = (tile.is_master_vec.at(portnum) == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + uint8_t streamPortId = static_cast(tile.stream_ids.at(portnum)); + + // auto streamPortId = tile.stream_id; + // Define stream switch port to monitor interface + XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, SOUTH, streamPortId); + std::stringstream msg; + msg << "Configured shim tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") << " stream switch ports for metricset " << metricSet << " and stream port id " << (int)streamPortId << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + return; + } + + if (type == module_type::mem_tile) { + auto slaveOrMaster = (metricSet.find("mm2s") != std::string::npos) ? + XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + XAie_EventSelectStrmPort(&aieDevInst, loc, rscId, slaveOrMaster, DMA, channel); + std::stringstream msg; + msg << "Configured mem tile " << (aie::isInputSet(type,metricSet) ? "S2MM" : "MM2S") << " stream switch ports for metricset " << metricSet << " and channel " << (int)channel << "."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + } + } + + void AieProfile_VE2Impl::generatePollElf() + { + auto context = metadata->getHwContext(); + + std::string tranxName = "AieProfilePoll"; + if (!tranxHandler->initializeTransaction(&aieDevInst, tranxName)) { + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Unable to initialize transaction for AIE profile polling."); + return; + } + for (u32 i = 0; i < op_profile_data.size(); i++) { + XAie_SaveRegister(&aieDevInst, op_profile_data[i], i); + } + if (!tranxHandler->completeASM(&aieDevInst)) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE Profile: Failed to finalize poll ASM."); + return; + } + if (!tranxHandler->generateELF()) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE Profile: Failed to generate poll ELF."); + return; + } + finishedPoll = false; + xrt_core::message::send(severity_level::debug, "XRT", + "AIE Profile: Poll ASM/ELF ready (submit deferred to teardown)."); + } + + + void AieProfile_VE2Impl::poll(const uint64_t id) + { + // Wait until xclbin has been loaded and device has been updated in database + if (!(db->getStaticInfo().isDeviceReady(id))) + return; + + if (finishedPoll) + return; + + if (db->infoAvailable(xdp::info::ml_timeline)) { + db->broadcast(VPDatabase::MessageType::READ_RECORD_TIMESTAMPS, nullptr); + xrt_core::message::send(severity_level::debug, "XRT", "Done reading recorded timestamps."); + } + + auto context = metadata->getHwContext(); + if (!tranxHandler->submitELF(context)) + return; + + resultBO.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + uint32_t* output = resultBO.map(); + + // Get timestamp in milliseconds + double timestamp = xrt_core::time_ns() / 1.0e6; + + //**************************TODO: Remove this after testing *************************** + for (u32 i = 0; i < op_profile_data.size() + 12 * 3; i++) { + std::stringstream msg; + msg << "Counter address/values: " << output[2 * i] << " - " << output[2 * i + 1]; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", msg.str()); + } + + // Process counter values and add to database + for (u32 i = 0; i < op_profile_data.size(); i++) { + // Update counter value in outputValues and add to database + std::vector values = outputValues[i]; + values[5] = static_cast(output[2 * i + 1]); // Write counter value + db->getDynamicInfo().addAIESample(id, timestamp, values); + } + + finishedPoll = true; + } + + bool AieProfile_VE2Impl::checkAieDevice(const uint64_t deviceId, void* handle) {} + uint64_t AieProfile_VE2Impl::getCounterPayload(XAie_DevInst* aieDevInst, const tile_type& tile, const module_type type, uint8_t column, uint8_t row, uint16_t startEvent, const std::string metricSet, const uint8_t channel, uint8_t logicalPortIndex) {} + uint64_t AieProfile_VE2Impl::getAdfProfileAPIPayload(const tile_type& tile, const std::string metricSet) {} + void AieProfile_VE2Impl::printTileModStats(xaiefal::XAieDev* aieDevice, const tile_type& tile, XAie_ModuleType mod) {} + void AieProfile_VE2Impl::startPoll(const uint64_t id) {} + void AieProfile_VE2Impl::continuePoll(const uint64_t id) {} + void AieProfile_VE2Impl::endPoll() {} + void AieProfile_VE2Impl::freeResources() {} + void AieProfile_VE2Impl::displayAdfAPIResults() {} +} +// END XDNA flow + +#endif diff --git a/profile/plugin/aie_profile/ve2/aie_profile.h b/profile/plugin/aie_profile/ve2/aie_profile.h index e5671b08..f2ab51ef 100644 --- a/profile/plugin/aie_profile/ve2/aie_profile.h +++ b/profile/plugin/aie_profile/ve2/aie_profile.h @@ -12,11 +12,19 @@ #include "core/edge/common/aie_parser.h" #include "xdp/profile/plugin/aie_profile/aie_profile_impl.h" #include "xdp/profile/plugin/aie_profile/util/aie_profile_util.h" +#include "xdp/profile/device/common/ve2/ve2_transaction.h" +#include "xdp/profile/plugin/aie_base/generations/aie2ps_registers.h" #include "xaiefal/xaiefal.hpp" +#include "xrt/xrt_bo.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp { @@ -85,13 +93,37 @@ namespace xdp { std::pair getShimBroadcastChannel(const tile_type& srcTile); - void - displayAdfAPIResults(); + void displayAdfAPIResults(); private: + #ifdef XDP_VE2_ZOCL_BUILD XAie_DevInst* aieDevInst = nullptr; xaiefal::XAieDev* aieDevice = nullptr; - + #else + void configEventSelections(const XAie_LocType loc, const module_type type, const std::string metricSet, std::vector& channels); + void configStreamSwitchPorts(const tile_type& tile, const XAie_LocType& loc, const module_type& type, const std::string& metricSet, const uint8_t channel, const XAie_Events startEvent); + void generatePollElf(); + + std::unique_ptr tranxHandler; + xrt::bo resultBO; + XAie_DevInst aieDevInst = {0}; + bool finishedPoll = false; + std::vector op_profile_data; + std::vector> outputValues; + + // Register offsets per tile type for VE2 (AIE2PS) — used to build the poll ELF. + const std::map> regValues { + {module_type::core, {aie2ps::cm_performance_counter0, aie2ps::cm_performance_counter1, + aie2ps::cm_performance_counter2, aie2ps::cm_performance_counter3}}, + {module_type::dma, {aie2ps::mm_performance_counter0, aie2ps::mm_performance_counter1, + aie2ps::mm_performance_counter2, aie2ps::mm_performance_counter3}}, + {module_type::shim, {aie2ps::shim_performance_counter0, aie2ps::shim_performance_counter1, + aie2ps::shim_performance_counter2, aie2ps::shim_performance_counter3}}, + {module_type::mem_tile, {aie2ps::mem_performance_counter0, aie2ps::mem_performance_counter1, + aie2ps::mem_performance_counter2, aie2ps::mem_performance_counter3}} + }; + #endif + std::map> coreStartEvents; std::map> coreEndEvents; std::map> memoryStartEvents; diff --git a/profile/plugin/aie_status/CMakeLists.txt b/profile/plugin/aie_status/CMakeLists.txt index b116ef29..601e2ae7 100644 --- a/profile/plugin/aie_status/CMakeLists.txt +++ b/profile/plugin/aie_status/CMakeLists.txt @@ -17,6 +17,14 @@ file(GLOB AIE_STATUS_PLUGIN_FILES if (XDP_VE2_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_status_plugin SHARED ${AIE_STATUS_PLUGIN_FILES}) add_dependencies(xdp_aie_status_plugin xdp_core) + # TODO: add aie_codegen to aie_status once XAie_Read32 support is there + #target_link_libraries(xdp_aie_status_plugin PRIVATE xdp_core aie_codegen) + #target_link_options(xdp_aie_status_plugin PRIVATE -Wl,-Bsymbolic) + #target_compile_definitions(xdp_aie_status_plugin PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 FAL_LINUX="on") + #target_include_directories(xdp_aie_status_plugin PRIVATE + # ${CMAKE_SOURCE_DIR}/src + # ${AIEFAL_DIR} + #) target_link_libraries(xdp_aie_status_plugin PRIVATE xdp_core xaiengine) target_compile_definitions(xdp_aie_status_plugin PRIVATE XDP_VE2_BUILD=1 FAL_LINUX="on") target_include_directories(xdp_aie_status_plugin PRIVATE ${CMAKE_SOURCE_DIR}/src) diff --git a/profile/plugin/aie_status/aie_status_plugin.h b/profile/plugin/aie_status/aie_status_plugin.h index e502d6a2..8cc03466 100644 --- a/profile/plugin/aie_status/aie_status_plugin.h +++ b/profile/plugin/aie_status/aie_status_plugin.h @@ -33,8 +33,13 @@ #include "xdp/profile/plugin/vp_base/vp_base_plugin.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include -#include "xaiengine/xaie_helper.h" +#include +#endif } namespace xdp { diff --git a/profile/plugin/aie_trace/CMakeLists.txt b/profile/plugin/aie_trace/CMakeLists.txt index 249a344d..70e7148a 100644 --- a/profile/plugin/aie_trace/CMakeLists.txt +++ b/profile/plugin/aie_trace/CMakeLists.txt @@ -52,8 +52,8 @@ if (XDP_CLIENT_BUILD_CMAKE STREQUAL "yes") add_library(xdp_aie_trace_plugin SHARED xdp_aie_trace_plugin-version.rc ${AIE_TRACE_PLUGIN_FILES} ${AIE_TRACE_COMPONENT_FILES} ${AIE_TRACE_UTIL_FILES} ${AIE_DRIVER_COMMON_UTIL_FILES} ${AIE_JSON_PARSER_FILES}) add_dependencies(xdp_aie_trace_plugin xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_trace_plugin PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_trace_plugin PRIVATE XDP_CLIENT_BUILD=1 -DXAIE_FEATURE_MSVC) + target_link_libraries(xdp_aie_trace_plugin PRIVATE xdp_core xrt_coreutil aie_codegen) + target_compile_definitions(xdp_aie_trace_plugin PRIVATE XDP_CLIENT_BUILD=1 XDP_USE_AIE_CODEGEN=1 -DXAIE_FEATURE_MSVC) target_include_directories(xdp_aie_trace_plugin PRIVATE ${AIERT_DIR}/include) set_target_properties(xdp_aie_trace_plugin PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) @@ -104,11 +104,21 @@ else() "${IMPL_DIR}/*.cpp" ) - add_library(xdp_aie_trace_plugin_xdna SHARED ${AIE_TRACE_PLUGIN_FILES} ${AIE_TRACE_COMPONENT_FILES} ${AIE_TRACE_UTIL_FILES} ${AIE_TRACE_CONFIG_FILES} ${AIE_JSON_PARSER_FILES}) + add_library(xdp_aie_trace_plugin_xdna SHARED ${AIE_TRACE_PLUGIN_FILES} ${AIE_TRACE_COMPONENT_FILES} ${AIE_TRACE_UTIL_FILES} ${AIE_TRACE_CONFIG_FILES} ${AIE_JSON_PARSER_FILES} "${PROFILE_DIR}/device/common/ve2/ve2_transaction.cpp") add_dependencies(xdp_aie_trace_plugin_xdna xdp_core xrt_coreutil) - target_link_libraries(xdp_aie_trace_plugin_xdna PRIVATE xdp_core xrt_coreutil xaiengine) - target_compile_definitions(xdp_aie_trace_plugin_xdna PRIVATE XDP_VE2_BUILD=1 FAL_LINUX="on") - target_include_directories(xdp_aie_trace_plugin_xdna PRIVATE ${CMAKE_SOURCE_DIR}/src) + target_link_libraries(xdp_aie_trace_plugin_xdna PRIVATE xdp_core xrt_coreutil aie_codegen aiebu_library_objects) + target_link_options(xdp_aie_trace_plugin_xdna PRIVATE -Wl,-Bsymbolic) + target_compile_definitions(xdp_aie_trace_plugin_xdna PRIVATE XDP_VE2_BUILD=1 XDP_USE_AIE_CODEGEN=1 XAIE_FEATURE_MSVC FAL_LINUX="on") + target_include_directories(xdp_aie_trace_plugin_xdna PRIVATE + ${CMAKE_SOURCE_DIR}/src + ${AIEFAL_DIR} + ${AIEBU_SOURCE_DIR}/src/cpp/include + ${AIERT_DIR}/include + ${XRT_SOURCE_DIR}/runtime_src/core/common/elf + ) + #target_link_libraries(xdp_aie_trace_plugin_xdna PRIVATE xdp_core xrt_coreutil xaiengine) + #target_compile_definitions(xdp_aie_trace_plugin_xdna PRIVATE XDP_VE2_BUILD=1 FAL_LINUX="on") + #target_include_directories(xdp_aie_trace_plugin_xdna PRIVATE ${CMAKE_SOURCE_DIR}/src) set_target_properties(xdp_aie_trace_plugin_xdna PROPERTIES VERSION ${XRT_VERSION_STRING} SOVERSION ${XRT_SOVERSION}) install (TARGETS xdp_aie_trace_plugin_xdna @@ -135,7 +145,8 @@ else() add_dependencies(xdp_aie_trace_plugin xdp_core xrt_coreutil) target_link_libraries(xdp_aie_trace_plugin PRIVATE xdp_core xrt_coreutil xaiengine) if (XDP_VE2_BUILD_CMAKE STREQUAL "yes") - target_compile_definitions(xdp_aie_trace_plugin PRIVATE XDP_VE2_ZOCL_BUILD=1 FAL_LINUX="on") + # VE2 ZOCL (edge): xaiengine / FAL only — no aie_codegen (see aie_trace_offload_ve2.h). + target_compile_definitions(xdp_aie_trace_plugin PRIVATE XDP_VE2_BUILD=1 XDP_VE2_ZOCL_BUILD=1 FAL_LINUX="on") else() target_compile_definitions(xdp_aie_trace_plugin PRIVATE FAL_LINUX="on") endif() diff --git a/profile/plugin/aie_trace/aie_trace_offload_manager.cpp b/profile/plugin/aie_trace/aie_trace_offload_manager.cpp index e31139c7..ddf8d3e2 100644 --- a/profile/plugin/aie_trace/aie_trace_offload_manager.cpp +++ b/profile/plugin/aie_trace/aie_trace_offload_manager.cpp @@ -49,6 +49,11 @@ uint64_t AIETraceOffloadManager::checkAndCapToBankSize(uint8_t memIndex, uint64_ {} void AIETraceOffloadManager::initPLIO(void* handle, PLDeviceIntf* deviceIntf, uint64_t bufSize, uint64_t numStreams, XAie_DevInst* devInst) { + // VE2 XDNA: PLIO unsupported and AIETraceOffload has no devInst ctor — omit the body below so it is not instantiated. +#if defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD) + xrt_core::message::send(severity_level::debug, "XRT", "No support for VE2 XDNA PLIO right now"); + return; +#else if (!offloadEnabledPLIO) return; @@ -70,13 +75,14 @@ uint64_t AIETraceOffloadManager::checkAndCapToBankSize(uint8_t memIndex, uint64_ << " MB is used for AIE trace buffer for " << numStreams << " PLIO streams."; xrt_core::message::send(severity_level::debug, "XRT", msg.str()); - +#endif } - #ifdef XDP_CLIENT_BUILD +#if defined(XDP_CLIENT_BUILD) || (defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD)) void AIETraceOffloadManager::initGMIO(void* handle, PLDeviceIntf* deviceIntf, - uint64_t bufSize, uint64_t numStreams, xrt::hw_context context, - std::shared_ptr metadata) { + uint64_t bufSize, uint64_t numStreams, xrt::hw_context context, + std::shared_ptr metadata) + { if (!offloadEnabledGMIO) return; @@ -206,10 +212,14 @@ uint64_t AIETraceOffloadManager::checkAndCapToBankSize(uint8_t memIndex, uint64_ } } - bool AIETraceOffloadManager::configureAndInitPLIO(void* handle, PLDeviceIntf* deviceIntf, uint64_t desiredBufSize, uint64_t numStreamsPLIO, XAie_DevInst* devInst) { +#if (defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD)) + xrt_core::message::send(severity_level::debug, "XRT", "No support for VE2 XDNA PLIO right now"); + return true; +#endif + uint8_t memIndex = 0; if (deviceIntf) memIndex = deviceIntf->getAIETs2mmMemIndex(0); @@ -227,23 +237,23 @@ bool AIETraceOffloadManager::configureAndInitPLIO(void* handle, PLDeviceIntf* de return true; } -bool AIETraceOffloadManager::configureAndInitGMIO( - void* handle, PLDeviceIntf* deviceIntf, - uint64_t desiredBufSize, uint64_t numStreamsGMIO -#ifdef XDP_CLIENT_BUILD - , const xrt::hw_context& hwctx, const std::shared_ptr& md -#else - , XAie_DevInst* devInst -#endif - ) +#if defined(XDP_CLIENT_BUILD) || (defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD)) +bool AIETraceOffloadManager::configureAndInitGMIO(void* handle, PLDeviceIntf* deviceIntf, + uint64_t desiredBufSize, uint64_t numStreamsGMIO, + const xrt::hw_context& hwctx, const std::shared_ptr& md) { desiredBufSize = checkAndCapToBankSize(/*bank 0*/ 0, desiredBufSize); desiredBufSize = aieTraceImpl->checkTraceBufSize(desiredBufSize); - -#ifdef XDP_CLIENT_BUILD initGMIO(handle, deviceIntf, desiredBufSize, numStreamsGMIO, hwctx, md); return true; +} #else +bool AIETraceOffloadManager::configureAndInitGMIO(void* handle, PLDeviceIntf* deviceIntf, + uint64_t desiredBufSize, uint64_t numStreamsGMIO, + XAie_DevInst* devInst) +{ + desiredBufSize = checkAndCapToBankSize(/*bank 0*/ 0, desiredBufSize); + desiredBufSize = aieTraceImpl->checkTraceBufSize(desiredBufSize); if (!devInst) { xrt_core::message::send(severity_level::warning, "XRT", "Unable to get AIE device instance. AIE event trace will not be available."); @@ -251,7 +261,7 @@ bool AIETraceOffloadManager::configureAndInitGMIO( } initGMIO(handle, deviceIntf, desiredBufSize, numStreamsGMIO, devInst); return true; -#endif } +#endif } // namespace xdp diff --git a/profile/plugin/aie_trace/aie_trace_offload_manager.h b/profile/plugin/aie_trace/aie_trace_offload_manager.h index 638e74c0..da2ffb63 100644 --- a/profile/plugin/aie_trace/aie_trace_offload_manager.h +++ b/profile/plugin/aie_trace/aie_trace_offload_manager.h @@ -25,8 +25,13 @@ #endif extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } @@ -57,7 +62,7 @@ class AIETraceOffloadManager { void initPLIO(void* handle, PLDeviceIntf* deviceIntf, uint64_t bufSize, uint64_t numStreams, XAie_DevInst* devInst); // TODO: Use const references for parameters where applicable - #ifdef XDP_CLIENT_BUILD + #if defined(XDP_CLIENT_BUILD) || (defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD)) void initGMIO(void* handle, PLDeviceIntf* deviceIntf, uint64_t bufSize, uint64_t numStreams, xrt::hw_context context, std::shared_ptr metadata); @@ -73,14 +78,16 @@ class AIETraceOffloadManager { std::vector& writers); bool configureAndInitPLIO(void* handle, PLDeviceIntf* deviceIntf, uint64_t desiredBufSize, uint64_t numStreamsPLIO, XAie_DevInst* devInst); + +#if defined(XDP_CLIENT_BUILD) || (defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD)) bool configureAndInitGMIO(void* handle, PLDeviceIntf* deviceIntf, - uint64_t desiredBufSize, uint64_t numStreamsGMIO - #ifdef XDP_CLIENT_BUILD - , const xrt::hw_context& hwctx, const std::shared_ptr& md - #else - , XAie_DevInst* devInst - #endif - ); + uint64_t desiredBufSize, uint64_t numStreamsGMIO, + const xrt::hw_context& hwctx, const std::shared_ptr& md); +#else + bool configureAndInitGMIO(void* handle, PLDeviceIntf* deviceIntf, + uint64_t desiredBufSize, uint64_t numStreamsGMIO, + XAie_DevInst* devInst); +#endif }; // class AIETraceOffloadManager diff --git a/profile/plugin/aie_trace/aie_trace_plugin.cpp b/profile/plugin/aie_trace/aie_trace_plugin.cpp index c2bda88f..441bd500 100644 --- a/profile/plugin/aie_trace/aie_trace_plugin.cpp +++ b/profile/plugin/aie_trace/aie_trace_plugin.cpp @@ -174,7 +174,11 @@ void AieTracePluginUnified::updateAIEDevice(void *handle, bool hw_context_flow) AIEData.implementation = std::make_unique(db, AIEData.metadata); #elif defined(XRT_X86_BUILD) AIEData.implementation = std::make_unique(db, AIEData.metadata); -#elif XDP_VE2_BUILD +#elif defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD) + xrt::hw_context context = xrt_core::hw_context_int::create_hw_context_from_implementation(handle); + AIEData.metadata->setHwContext(context); + AIEData.implementation = std::make_unique(db, AIEData.metadata); +#elif defined(XDP_VE2_BUILD) AIEData.implementation = std::make_unique(db, AIEData.metadata); #else AIEData.implementation = std::make_unique(db, AIEData.metadata); @@ -260,7 +264,9 @@ void AieTracePluginUnified::updateAIEDevice(void *handle, bool hw_context_flow) // uint64_t aieTraceBufSizePLIO = aieTraceBufSize; // uint64_t aieTraceBufSizeGMIO = aieTraceBufSize; if (isPLIO && !configuredOnePlioPartition) { - +#if defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD) + // TODO: if VE2 XDNA flow then we do not have devInst so we do something else +#else XAie_DevInst* devInst = static_cast(AIEData.implementation->setAieDeviceInst(handle, deviceID)); if(!devInst) { xrt_core::message::send(severity_level::warning, "XRT", @@ -269,11 +275,12 @@ void AieTracePluginUnified::updateAIEDevice(void *handle, bool hw_context_flow) } AIEData.offloadManager->configureAndInitPLIO(handle, deviceIntf, aieTraceBufSize, AIEData.metadata->getNumStreamsPLIO(), devInst); - // Mark that we've successfully configured the first PLIO partition +#endif configuredOnePlioPartition = true; } + if (isGMIO) { -#ifdef XDP_CLIENT_BUILD +#if defined(XDP_CLIENT_BUILD) || (defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD)) if (!AIEData.offloadManager->configureAndInitGMIO( handle, deviceIntf, aieTraceBufSize, AIEData.metadata->getNumStreamsGMIO(), @@ -402,7 +409,7 @@ void AieTracePluginUnified::flushAIEDevice(void *handle) { void AieTracePluginUnified::finishFlushAIEDevice(void *handle) { xrt_core::message::send(severity_level::info, "XRT", "Beginning AIE Trace finishFlushAIEDevice."); - #ifdef XDP_CLIENT_BUILD + #if defined(XDP_VE2_BUILD) || (defined(XDP_VE2_BUILD) && !defined(XDP_VE2_ZOCL_BUILD)) // For now, just return please return; #endif diff --git a/profile/plugin/aie_trace/client/aie_trace.h b/profile/plugin/aie_trace/client/aie_trace.h index dde6da85..9de51e17 100644 --- a/profile/plugin/aie_trace/client/aie_trace.h +++ b/profile/plugin/aie_trace/client/aie_trace.h @@ -12,8 +12,8 @@ #include "xdp/profile/device/common/client_transaction.h" extern "C" { - #include - #include +#include +#include } namespace xdp { diff --git a/profile/plugin/aie_trace/util/aie_trace_util.h b/profile/plugin/aie_trace/util/aie_trace_util.h index 036e2ee7..adf86e93 100755 --- a/profile/plugin/aie_trace/util/aie_trace_util.h +++ b/profile/plugin/aie_trace/util/aie_trace_util.h @@ -9,8 +9,13 @@ #include "xdp/profile/plugin/aie_trace/aie_trace_metadata.h" extern "C" { +#ifdef XDP_USE_AIE_CODEGEN +#include +#include +#else #include #include +#endif } namespace xdp::aie::trace { diff --git a/profile/plugin/aie_trace/ve2/aie_trace.cpp b/profile/plugin/aie_trace/ve2/aie_trace.cpp index af5a47f1..71436336 100644 --- a/profile/plugin/aie_trace/ve2/aie_trace.cpp +++ b/profile/plugin/aie_trace/ve2/aie_trace.cpp @@ -18,6 +18,7 @@ #include "xdp/profile/plugin/aie_base/aie_base_util.h" #include "xdp/profile/plugin/vp_base/utility.h" +#include #include #include #include @@ -32,6 +33,11 @@ #include "core/common/api/hw_context_int.h" #include "shim_ve2/xdna_hwctx.h" +#ifndef XDP_VE2_ZOCL_BUILD +#include "xdp/profile/device/aie_trace/ve2/aie_trace_offload_ve2.h" +#endif + +#ifdef XDP_VE2_ZOCL_BUILD namespace { static void* fetchAieDevInst(void* devHandle) { @@ -93,6 +99,9 @@ namespace xdp { memoryTileTraceEndEvent = XAIE_EVENT_USER_EVENT_1_MEM_TILE; interfaceTileTraceStartEvent = XAIE_EVENT_TRUE_PL; interfaceTileTraceEndEvent = XAIE_EVENT_USER_EVENT_1_PL; + + // TODO: tranxHandler to record ASM transaction + // TODO: XAie_cfg to create local aieDevInst } /**************************************************************************** @@ -1215,5 +1224,1344 @@ namespace xdp { aieDevice = static_cast(db->getStaticInfo().getAieDevice(allocateAieDevice, deallocateAieDevice, handle, deviceID)); return aieDevInst; } - } // namespace xdp + +#else // XDNA flow + +namespace xdp { + using severity_level = xrt_core::message::severity_level; + + /**************************************************************************** + * Constructor: AIE trace implementation for edge devices + ***************************************************************************/ + AieTrace_VE2Impl::AieTrace_VE2Impl(VPDatabase* database, std::shared_ptr metadata) + : AieTraceImpl(database, metadata) + { + auto hwGen = metadata->getHardwareGen(); + + // Pre-defined metric sets + coreEventSets = aie::trace::getCoreEventSets(hwGen); + memoryEventSets = aie::trace::getMemoryEventSets(hwGen); + memoryTileEventSets = aie::trace::getMemoryTileEventSets(hwGen); + interfaceTileEventSets = aie::trace::getInterfaceTileEventSets(hwGen); + + // Core trace start/end: these are also broadcast to memory module + coreTraceStartEvent = XAIE_EVENT_ACTIVE_CORE; + coreTraceEndEvent = XAIE_EVENT_USER_EVENT_3_CORE; + + // Memory/interface tile trace is flushed at end of run + memoryTileTraceStartEvent = XAIE_EVENT_TRUE_MEM_TILE; + memoryTileTraceEndEvent = XAIE_EVENT_USER_EVENT_1_MEM_TILE; + interfaceTileTraceStartEvent = XAIE_EVENT_TRUE_PL; + interfaceTileTraceEndEvent = XAIE_EVENT_USER_EVENT_1_PL; + + tranxHandler = std::make_unique(); + + xdp::aie::driver_config meta_config = metadata->getAIEConfigMetadata(); + XAie_Config cfg { + meta_config.hw_gen, + meta_config.base_address, + meta_config.column_shift, + meta_config.row_shift, + meta_config.num_rows, + meta_config.num_columns, + meta_config.shim_row, + meta_config.mem_row_start, + meta_config.mem_num_rows, + meta_config.aie_tile_row_start, + meta_config.aie_tile_num_rows, + {0} // PartProp + }; + + auto RC = XAie_CfgInitialize(&aieDevInst, &cfg); + if (RC != XAIE_OK) + xrt_core::message::send(severity_level::warning, "XRT", "AIE Driver Initialization Failed."); + } + + /**************************************************************************** + * Verify correctness of trace buffer size + ***************************************************************************/ + uint64_t AieTrace_VE2Impl::checkTraceBufSize(uint64_t aieTraceBufSize) + { + uint64_t deviceMemorySize = getPSMemorySize(); + if (deviceMemorySize == 0) + return aieTraceBufSize; + + double percentSize = (100.0 * aieTraceBufSize) / deviceMemorySize; + + std::stringstream percentSizeStr; + percentSizeStr << std::fixed << std::setprecision(3) << percentSize; + + // Limit size of trace buffer if requested amount is too high + if (percentSize >= 80.0) { + aieTraceBufSize = static_cast(std::ceil(0.8 * deviceMemorySize)); + + std::stringstream newBufSizeStr; + newBufSizeStr << std::fixed << std::setprecision(3) << (aieTraceBufSize / (1024.0 * 1024.0)); // In MB + + std::string msg = "Requested AIE trace buffer is " + percentSizeStr.str() + "% of device memory." + + " You may run into errors depending upon memory usage" + " of your application." + + " Limiting to " + newBufSizeStr.str() + " MB."; + xrt_core::message::send(severity_level::warning, "XRT", msg); + } else { + std::string msg = "Requested AIE trace buffer is " + percentSizeStr.str() + "% of device memory."; + xrt_core::message::send(severity_level::info, "XRT", msg); + } + + return aieTraceBufSize; + } + + /**************************************************************************** + * Update device (e.g., after loading xclbin) + ***************************************************************************/ + void AieTrace_VE2Impl::updateDevice() + { + xrt_core::message::send(severity_level::info, "XRT", "Calling AIE Trace VE2 XDNA updateDevice."); + + // If runtime metrics are not enabled, do not configure trace + if(!metadata->getRuntimeMetrics()) + return; + + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(metadata->getHandle()); + if (aiePartitionPt.empty()) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE trace: no partition info for trace-start broadcast; skipping broadcast network."); + return; + } + + // Set metrics for counters and trace events + if (!setMetricsSettings(metadata->getDeviceID(), metadata->getHandle())) { + std::string msg("Unable to configure AIE trace control and events. No trace will be generated."); + xrt_core::message::send(severity_level::warning, "XRT", msg); + return; + } + + // Configure windowed event trace if layer-based start is enabled + if (xrt_core::config::get_aie_trace_settings_start_type() == "layer") { + if (!configureWindowedEventTrace(metadata->getHandle())) { + std::string msg("Unable to configure AIE windowed event trace"); + xrt_core::message::send(severity_level::warning, "XRT", msg); + return; + } + } + } + + /**************************************************************************** + * Configure windowed event trace for layer-based triggering + ***************************************************************************/ + bool AieTrace_VE2Impl::configureWindowedEventTrace(void* hwCtxImpl) + { + // Start recording the windowed event trace transaction + if (!tranxHandler->initializeTransaction(&aieDevInst, "AieTraceWindow")) { + xrt_core::message::send(severity_level::warning, "XRT", "AIE TRACE: Failed to initialize transaction for Windowed Event Trace."); + return false; + } + + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(hwCtxImpl); + // Currently, assuming only one Hw Context is alive at a time + //uint8_t startCol = static_cast(aiePartitionPt.back().second.get("start_col")); + uint8_t startCol = 0; + uint8_t numCols = static_cast(aiePartitionPt.back().second.get("num_cols")); + + std::cout << "!!! startCol: " << startCol << std::endl; + std::cout << "!!! numCols: " << numCols << std::endl; + + auto metadataReader = (VPDatabase::Instance()->getStaticInfo()).getAIEmetadataReader(metadata->getDeviceID()); + if (!metadataReader) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE metadata reader not available for windowed trace configuration"); + return false; + } + + // XDNA path has no xaiefal broadcast reservation; use fixed channels (see + // client/resources_def.h traceStartBroadcastChId1 / traceStartBroadcastChId2). + const uint8_t traceStartBroadcastChId1 = 6; + const uint8_t traceStartBroadcastChId2 = 7; + + // Define trace start events for different module types + XAie_Events shimTraceStartEvent = (XAie_Events) (XAIE_EVENT_BROADCAST_A_0_PL + traceStartBroadcastChId2); + XAie_Events memTileTraceStartEvent = (XAie_Events)(XAIE_EVENT_BROADCAST_0_MEM_TILE + traceStartBroadcastChId1); + XAie_Events coreModTraceStartEvent = (XAie_Events)(XAIE_EVENT_BROADCAST_0_CORE + traceStartBroadcastChId1); + XAie_Events memTraceStartEvent = (XAie_Events)(XAIE_EVENT_BROADCAST_0_MEM + traceStartBroadcastChId1); + + unsigned int startLayer = xrt_core::config::get_aie_trace_settings_start_layer(); + + // Configure trace start events for tiles + // NOTE: rows are stored as absolute as required by resource manager + for (auto& tileMetric : metadata->getConfigMetrics()) { + auto tile = tileMetric.first; + auto col = tile.col; + auto row = tile.row; + auto type = aie::getModuleType(row, metadata->getRowOffset()); + auto loc = XAie_TileLoc(col, row); + + if (startLayer != UINT_MAX) { + if (type == module_type::shim) { + // Configure shim/interface tile trace start + if (col == startCol) + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_PERF_CNT_0_PL); + else + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_PL_MOD, shimTraceStartEvent); + } + else if (type == module_type::mem_tile) { + // Configure memory tile trace start + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_MEM_MOD, memTileTraceStartEvent); + } + else if (type == module_type::core) { + // Configure core module trace start + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_CORE_MOD, coreModTraceStartEvent); + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_MEM_MOD, memTraceStartEvent); + } + } + } + + if (startLayer != UINT_MAX) { + XAie_PerfCounterControlSet(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, 0, XAIE_EVENT_USER_EVENT_0_PL, XAIE_EVENT_USER_EVENT_0_PL); + XAie_PerfCounterEventValueSet(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, 0, startLayer); + } + + // Build 2-channel broadcast network for trace start synchronization + build2ChannelBroadcastNetwork(hwCtxImpl, traceStartBroadcastChId1, traceStartBroadcastChId2, XAIE_EVENT_PERF_CNT_0_PL); + + xrt_core::message::send(severity_level::info, "XRT", "Finished AIE Windowed Trace Settings."); + auto hwContext = metadata->getHwContext(); + + // Submit the windowed event trace transaction + if (!tranxHandler->submitTransaction(&aieDevInst, hwContext)) { + xrt_core::message::send(severity_level::warning, "XRT", "AIE TRACE: Failed to submit windowed event trace transaction."); + return false; + } + + return true; + } + + /**************************************************************************** + * Configure requested tiles with trace metrics and settings + ***************************************************************************/ + bool AieTrace_VE2Impl::setMetricsSettings(uint64_t deviceId, void* handle) + { + if (!metadata->getIsValidMetrics()) { + std::string msg("AIE trace metrics were not specified in xrt.ini. AIE event trace will not be available."); + xrt_core::message::send(severity_level::warning, "XRT", msg); + return false; + } + + // Get partition columns + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(handle); + // Currently, assuming only one Hw Context is alive at a time + //uint8_t startCol = static_cast(aiePartitionPt.front().second.get("start_col")); + uint8_t startCol = 0; + uint8_t numCols = static_cast(aiePartitionPt.back().second.get("num_cols")); + + std::string startType = xrt_core::config::get_aie_trace_settings_start_type(); + unsigned int startLayer = xrt_core::config::get_aie_trace_settings_start_layer(); + + std::string tranxName = "AieTraceMetrics"; + xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", + "Starting transaction " + tranxName); + if (!tranxHandler->initializeTransaction(&aieDevInst, tranxName)) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE TRACE: Failed to initialize transaction for AIE TRACE Metrics."); + return false; + } + + // XDNA: fixed broadcast IDs for trace-start network (matches client/resources_def.h). + const uint8_t traceStartBroadcastChId1 = 6; + const uint8_t traceStartBroadcastChId2 = 7; + + // Get channel configurations (memory and interface tiles) + auto configChannel0 = metadata->getConfigChannel0(); + auto configChannel1 = metadata->getConfigChannel1(); + + // Get the column shift for partition + // NOTE: If partition is not used, this value is zero. + uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); + aie::displayColShiftInfo(startColShift); + + // Zero trace event tile counts + for (int m = 0; m < static_cast(module_type::num_types); ++m) { + for (int n = 0; n <= NUM_TRACE_EVENTS; ++n) + mNumTileTraceEvents[m][n] = 0; + } + + auto metadataReader = (VPDatabase::Instance()->getStaticInfo()).getAIEmetadataReader(deviceId); + if (!metadataReader) { + if (aie::isDebugVerbosity()) { + std::stringstream msg; + msg << "AIE metadata reader is null"; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + } + } + + // Using user event for trace end to enable flushing + // NOTE: Flush trace module always at the end because for some applications + // core might be running infinitely. + if (metadata->getUseUserControl()) + coreTraceStartEvent = XAIE_EVENT_INSTR_EVENT_0_CORE; + coreTraceEndEvent = XAIE_EVENT_USER_EVENT_3_CORE; + + // Iterate over all used/specified tiles + // NOTE: rows are stored as absolute as required by resource manager + for (auto& tileMetric : metadata->getConfigMetrics()) { + auto& metricSet = tileMetric.second; + auto tile = tileMetric.first; + auto col = tile.col + startColShift; + auto row = tile.row; + auto subtype = tile.subtype; + auto type = aie::getModuleType(row, metadata->getRowOffset()); + auto typeInt = static_cast(type); + auto loc = XAie_TileLoc(col, row); + + if ((type == module_type::core) && !aie::isDmaSet(metricSet)) { + // If we're not looking at DMA events, then don't display the DMA + // If core is not active (i.e., DMA-only tile), then ignore this tile + if (tile.active_core) + tile.active_memory = false; + else + continue; + } + + std::string tileName = (type == module_type::mem_tile) ? "memory" + : ((type == module_type::shim) ? "interface" : "AIE"); + tileName.append(" tile (" + std::to_string(col) + "," + std::to_string(row) + ")"); + + if (aie::isInfoVerbosity()) { + std::stringstream infoMsg; + infoMsg << "Configuring " << tileName << " for trace using metric set " << metricSet; + xrt_core::message::send(severity_level::info, "XRT", infoMsg.str()); + } + + // Store location to flush at end of run + if (type == module_type::core || (type == module_type::mem_tile) + || (type == module_type::shim)) { + if (type == module_type::core) + traceFlushLocs.push_back(loc); + else if (type == module_type::mem_tile) + memoryTileTraceFlushLocs.push_back(loc); + else if (type == module_type::shim) + interfaceTileTraceFlushLocs.push_back(loc); + } + + // AIE config object for this tile + auto cfgTile = std::make_unique(col, row, type); + cfgTile->type = type; + cfgTile->trace_metric_set = metricSet; + cfgTile->active_core = tile.active_core; + cfgTile->active_memory = tile.active_memory; + + // Catch core execution trace + if ((type == module_type::core) && (metricSet == "execution")) { + // Set start/end events, use execution packets, and start trace module + XAie_TraceStopEvent(&aieDevInst, loc, XAIE_CORE_MOD, coreTraceEndEvent); + + // Driver requires at least one, non-zero trace event + XAie_TraceEvent(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_TRUE_CORE, 0); + + XAie_Packet pkt = {0, 0}; + XAie_TraceModeConfig(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_TRACE_INST_EXEC); + XAie_TracePktConfig(&aieDevInst, loc, XAIE_CORE_MOD, pkt); + + if(startType != "layer" || startLayer == UINT_MAX) + XAie_TraceStartEvent(&aieDevInst, loc, XAIE_CORE_MOD, coreTraceStartEvent); + (db->getStaticInfo()).addAIECfgTile(deviceId, cfgTile); + continue; + } + + // Get vector of pre-defined metrics for this set + // NOTE: these are local copies as we are adding tile/counter-specific events + EventVector coreEvents; + EventVector memoryEvents; + EventVector interfaceEvents; + if (type == module_type::core) { + coreEvents = coreEventSets[metricSet]; + memoryEvents = memoryEventSets[metricSet]; + } + else if (type == module_type::mem_tile) { + memoryEvents = memoryTileEventSets[metricSet]; + } + else if (type == module_type::shim) { + interfaceEvents = interfaceTileEventSets[metricSet]; + } + + if (coreEvents.empty() && memoryEvents.empty() && interfaceEvents.empty()) { + std::stringstream msg; + msg << "Event trace is not available for " << tileName << " using metric set " + << metricSet << " on hardware generation " << metadata->getHardwareGen() << "."; + xrt_core::message::send(severity_level::warning, "XRT", msg.str()); + continue; + } + + if (xrt_core::config::get_verbosity() >= static_cast(severity_level::info)) { + std::stringstream infoMsg; + auto tileName = (type == module_type::mem_tile) ? "memory" + : ((type == module_type::shim) ? "interface" : "AIE"); + infoMsg << "Configuring " << tileName << " tile (" << +col << "," + << +row << ") for trace using metric set " << metricSet; + xrt_core::message::send(severity_level::info, "XRT", infoMsg.str()); + } + + int numCoreTraceEvents = 0; + int numMemoryTraceEvents = 0; + int numInterfaceTraceEvents = 0; + + // + // 1. Configure Core Trace Events + // + if (type == module_type::core) { + xrt_core::message::send(severity_level::info, "XRT", "Configuring Core Trace Events"); + + XAie_ModuleType mod = XAIE_CORE_MOD; + uint16_t phyEvent = 0; + + // Configure combo & group events (e.g., DMA monitoring/group masks) + auto comboEvents = configComboEvents(loc, mod, type, metricSet, cfgTile->core_trace_config); + (void)comboEvents; + configGroupEvents(loc, mod, type, metricSet); + + // Set end event for trace capture + // NOTE: This needs to be done first + if (XAie_TraceStopEvent(&aieDevInst, loc, mod, coreTraceEndEvent) != XAIE_OK) + break; + + // Program core trace event slots + for (uint8_t i = 0; i < coreEvents.size(); ++i) { + if (XAie_TraceEvent(&aieDevInst, loc, mod, coreEvents[i], i) != XAIE_OK) + break; + + ++numCoreTraceEvents; + + // Update config file + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, coreEvents[i], &phyEvent); + cfgTile->core_trace_config.traced_events[i] = phyEvent; + } + + // Update config file + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, coreTraceStartEvent, &phyEvent); + cfgTile->core_trace_config.start_event = phyEvent; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, coreTraceEndEvent, &phyEvent); + cfgTile->core_trace_config.stop_event = phyEvent; + + coreEvents.clear(); + mNumTileTraceEvents[typeInt][numCoreTraceEvents]++; + + XAie_Packet pkt = {0, 0}; // core trace uses PC packets + if (XAie_TraceModeConfig(&aieDevInst, loc, mod, XAIE_TRACE_EVENT_PC) != XAIE_OK) + break; + if (XAie_TracePktConfig(&aieDevInst, loc, mod, pkt) != XAIE_OK) + break; + if (startType != "layer" || startLayer == UINT_MAX) + XAie_TraceStartEvent(&aieDevInst, loc, mod, coreTraceStartEvent); + } // Core modules + + // + // 2. Configure Memory Trace Events + // + // Applicable to memory module in AIE tiles and memory tiles + if ((type == module_type::core) || (type == module_type::mem_tile)) { + xrt_core::message::send(severity_level::info, "XRT", "Configuring Memory Trace Events"); + + XAie_ModuleType mod = XAIE_MEM_MOD; + uint8_t firstBroadcastId = 8; + + XAie_Events traceStartEvent = + (type == module_type::core) ? coreTraceStartEvent : memoryTileTraceStartEvent; + XAie_Events traceEndEvent = + (type == module_type::core) ? coreTraceEndEvent : memoryTileTraceEndEvent; + + aie_cfg_base& aieConfig = cfgTile->core_trace_config; + if (type == module_type::mem_tile) + aieConfig = cfgTile->memory_tile_trace_config; + + // Combo event override (for DMA metric sets) + auto comboEvents = configComboEvents(loc, mod, type, metricSet, aieConfig); + if (comboEvents.size() == 2) { + traceStartEvent = comboEvents.at(0); + traceEndEvent = comboEvents.at(1); + } + else if (type == module_type::core) { + // Route core start/stop into memory-module trace via broadcast (same as client/NPU3). + if (!m_trace_start_broadcast) { + if (XAie_EventBroadcast(&aieDevInst, loc, XAIE_CORE_MOD, 8, traceStartEvent) != XAIE_OK) + break; + } + if (XAie_EventBroadcast(&aieDevInst, loc, XAIE_CORE_MOD, 9, traceEndEvent) != XAIE_OK) + break; + + uint16_t phyBroadcast = 0; + if (!m_trace_start_broadcast) { + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, XAIE_CORE_MOD, traceStartEvent, &phyBroadcast); + cfgTile->core_trace_config.internal_events_broadcast[8] = phyBroadcast; + } + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, XAIE_CORE_MOD, traceEndEvent, &phyBroadcast); + cfgTile->core_trace_config.internal_events_broadcast[9] = phyBroadcast; + + if (m_trace_start_broadcast) + traceStartEvent = + static_cast(XAIE_EVENT_BROADCAST_0_MEM + traceStartBroadcastChId1); + else + traceStartEvent = XAIE_EVENT_BROADCAST_8_MEM; + traceEndEvent = XAIE_EVENT_BROADCAST_9_MEM; + firstBroadcastId = 10; + } + + if (type == module_type::core) { + if (XAie_EventBroadcastBlockMapDir(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_SWITCH_A, 0xFF00, + XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_NORTH | + XAIE_EVENT_BROADCAST_SOUTH) != XAIE_OK) + break; + if (XAie_EventBroadcastBlockMapDir(&aieDevInst, loc, XAIE_MEM_MOD, XAIE_EVENT_SWITCH_A, 0xFF00, + XAIE_EVENT_BROADCAST_EAST | XAIE_EVENT_BROADCAST_NORTH | + XAIE_EVENT_BROADCAST_SOUTH) != XAIE_OK) + break; + + for (uint8_t bi = 8; bi < 16; bi++) { + if (XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_SWITCH_A, bi, + XAIE_EVENT_BROADCAST_EAST) != XAIE_OK) + break; + } + } + + // Configure stream switch ports (core SS DMA monitors feeding MEM-side trace) + configStreamSwitchPorts(tile, loc, type, metricSet, 0, 0, memoryEvents, aieConfig); + + memoryModTraceStartEvent = traceStartEvent; + if (XAie_TraceStopEvent(&aieDevInst, loc, mod, traceEndEvent) != XAIE_OK) + break; + + { + uint16_t phyEvent1 = 0; + uint16_t phyEvent2 = 0; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, traceStartEvent, &phyEvent1); + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, traceEndEvent, &phyEvent2); + if (type == module_type::core) { + cfgTile->memory_trace_config.start_event = phyEvent1; + cfgTile->memory_trace_config.stop_event = phyEvent2; + } else { + cfgTile->memory_tile_trace_config.start_event = phyEvent1; + cfgTile->memory_tile_trace_config.stop_event = phyEvent2; + } + } + + auto iter0 = configChannel0.find(tile); + auto iter1 = configChannel1.find(tile); + uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; + uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; + + if (type == module_type::mem_tile) { + configEventSelections(tile, loc, type, metricSet, channel0, channel1, cfgTile->memory_tile_trace_config); + } else { + if (!memoryEvents.empty()) { + auto channelNum = aie::getChannelNumberFromEvent(memoryEvents.at(0)); + if (channelNum >= 0) { + if (aie::isInputSet(type, metricSet)) { + cfgTile->core_trace_config.mm2s_channels[0] = channelNum; + if (static_cast(channelNum) < tile.mm2s_names.size()) + cfgTile->core_trace_config.mm2s_names[0] = tile.mm2s_names.at(channelNum); + } else { + cfgTile->core_trace_config.s2mm_channels[0] = channelNum; + if (static_cast(channelNum) < tile.s2mm_names.size()) + cfgTile->core_trace_config.s2mm_names[0] = tile.s2mm_names.at(channelNum); + } + } + } + } + + uint8_t bcId = firstBroadcastId; + int bcIndex = (firstBroadcastId == 10) ? 2 : 0; + static const XAie_Events kMemBcEv[] = { + XAIE_EVENT_BROADCAST_8_MEM, XAIE_EVENT_BROADCAST_9_MEM, XAIE_EVENT_BROADCAST_10_MEM, + XAIE_EVENT_BROADCAST_11_MEM, XAIE_EVENT_BROADCAST_12_MEM, XAIE_EVENT_BROADCAST_13_MEM, + XAIE_EVENT_BROADCAST_14_MEM, XAIE_EVENT_BROADCAST_15_MEM}; + + for (uint8_t i = 0; i < memoryEvents.size(); i++) { + const bool isCoreEvent = xdp::aie::isCoreModuleEvent(memoryEvents[i]); + + if (isCoreEvent) { + if (XAie_EventBroadcast(&aieDevInst, loc, XAIE_CORE_MOD, bcId, memoryEvents[i]) != XAIE_OK) + break; + if (bcIndex >= static_cast(sizeof(kMemBcEv) / sizeof(kMemBcEv[0]))) + break; + if (XAie_TraceEvent(&aieDevInst, loc, XAIE_MEM_MOD, kMemBcEv[bcIndex++], i) != XAIE_OK) + break; + } else { + if (XAie_TraceEvent(&aieDevInst, loc, XAIE_MEM_MOD, memoryEvents[i], i) != XAIE_OK) + break; + } + + ++numMemoryTraceEvents; + + configEdgeEvents(tile, type, metricSet, memoryEvents[i], channel0); + + uint16_t phyEvent = 0; + const XAie_ModuleType phyModConv = isCoreEvent ? XAIE_CORE_MOD : XAIE_MEM_MOD; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, phyModConv, memoryEvents[i], &phyEvent); + + if (isCoreEvent) { + cfgTile->core_trace_config.internal_events_broadcast[bcId] = phyEvent; + cfgTile->memory_trace_config.traced_events[i] = bcIdToEvent(bcId); + ++bcId; + } else if (type == module_type::mem_tile) { + cfgTile->memory_tile_trace_config.traced_events[i] = phyEvent; + } else { + cfgTile->memory_trace_config.traced_events[i] = phyEvent; + } + } + + memoryEvents.clear(); + mNumTileTraceEvents[typeInt][numMemoryTraceEvents]++; + + uint8_t packetType = (type == module_type::mem_tile) ? 3 : 1; + XAie_Packet pkt = {0, packetType}; + + if (XAie_TracePktConfig(&aieDevInst, loc, mod, pkt) != XAIE_OK) + break; + if ((startType != "layer") || (startLayer == UINT_MAX)) { + if (XAie_TraceStartEvent(&aieDevInst, loc, mod, traceStartEvent) != XAIE_OK) + break; + } + + if (type == module_type::mem_tile) + cfgTile->memory_tile_trace_config.packet_type = packetType; + else + cfgTile->memory_trace_config.packet_type = packetType; + } // Memory modules/tiles + + // + // 3. Configure Interface Tile Trace Events + // + if (type == module_type::shim) { + xrt_core::message::send(severity_level::info, "XRT", "Configuring Interface Tile Trace Events"); + XAie_ModuleType mod = XAIE_PL_MOD; + + auto iter0 = configChannel0.find(tile); + auto iter1 = configChannel1.find(tile); + uint8_t channel0 = (iter0 == configChannel0.end()) ? 0 : iter0->second; + uint8_t channel1 = (iter1 == configChannel1.end()) ? 1 : iter1->second; + std::vector channels = {channel0, channel1, 2, 3}; + + modifyEvents(type, subtype, metricSet, channel0, interfaceEvents); + configEventSelections(tile, loc, type, metricSet, channel0, channel1, cfgTile->interface_tile_trace_config); + configStreamSwitchPorts(tileMetric.first, loc, type, metricSet, channel0, channel1, + interfaceEvents, cfgTile->interface_tile_trace_config); + + // Configure interface tile trace events + for (size_t i = 0; i < interfaceEvents.size(); ++i) { + auto event = interfaceEvents.at(i); + if (XAie_TraceEvent(&aieDevInst, loc, mod, event, static_cast(i)) != XAIE_OK) + break; + + ++numInterfaceTraceEvents; + + uint16_t phyEvent = 0; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, XAIE_PL_MOD, event, &phyEvent); + cfgTile->interface_tile_trace_config.traced_events[i] = phyEvent; + } + + // Update config file + { + // Add interface trace control events + // Start + uint16_t phyEvent = 0; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, XAIE_PL_MOD, interfaceTileTraceStartEvent, &phyEvent); + cfgTile->interface_tile_trace_config.start_event = phyEvent; + // Stop + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, XAIE_PL_MOD, interfaceTileTraceEndEvent, &phyEvent); + cfgTile->interface_tile_trace_config.stop_event = phyEvent; + } + + mNumTileTraceEvents[typeInt][numInterfaceTraceEvents]++; + + uint8_t packetType = 4; + XAie_Packet pkt = {0, packetType}; + if (XAie_TracePktConfig(&aieDevInst, loc, mod, pkt) != XAIE_OK) + break; + if (startType != "layer" || startLayer == UINT_MAX) { + if (XAie_TraceStartEvent(&aieDevInst, loc, mod, interfaceTileTraceStartEvent) != XAIE_OK) + break; + } + if (XAie_TraceStopEvent(&aieDevInst, loc, mod, interfaceTileTraceEndEvent) != XAIE_OK) + break; + + cfgTile->interface_tile_trace_config.packet_type = packetType; + + if (!interfaceEvents.empty()) { + auto channelNum = aie::getChannelNumberFromEvent(interfaceEvents.at(0)); + if (channelNum >= 0) { + if (aie::isInputSet(type, metricSet)) + cfgTile->interface_tile_trace_config.mm2s_channels[channelNum] = channelNum; + else + cfgTile->interface_tile_trace_config.s2mm_channels[channelNum] = channelNum; + } + } + } // Interface tiles + + if (xrt_core::config::get_verbosity() >= static_cast(severity_level::debug)) { + std::stringstream msg; + msg << "Reserved "; + if (type == module_type::core) + msg << numCoreTraceEvents << " core and " << numMemoryTraceEvents << " memory"; + else if (type == module_type::mem_tile) + msg << numMemoryTraceEvents << " memory tile"; + else if (type == module_type::shim) + msg << numInterfaceTraceEvents << " interface tile"; + msg << " trace events for tile (" << +col << "," << +row + << "). Adding tile to static database."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + } + + // Add config info to static database + // NOTE: Do not access cfgTile after this + (db->getStaticInfo()).addAIECfgTile(deviceId, cfgTile); + xrt_core::message::send(severity_level::info, "XRT", "Debugging XDP: after (db->getStaticInfo()).addAIECfgTile"); + } // For tiles + + // Report and store trace events per tile + for (int m = 0; m < static_cast(module_type::num_types); ++m) { + aie::trace::printTraceEventStats(m, mNumTileTraceEvents[m]); + for (int n = 0; n <= NUM_TRACE_EVENTS; ++n) + (db->getStaticInfo()).addAIECoreEventResources(deviceId, n, mNumTileTraceEvents[m][n]); + } + + if (m_trace_start_broadcast) { + xrt_core::message::send(severity_level::info, "XRT", "before build2ChannelBroadcastNetwork"); + build2ChannelBroadcastNetwork(handle, traceStartBroadcastChId1, traceStartBroadcastChId2, interfaceTileTraceStartEvent); + xrt_core::message::send(severity_level::info, "XRT", "before XAie_EventGenerate"); + XAie_EventGenerate(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, interfaceTileTraceStartEvent); + reset2ChannelBroadcastNetwork(handle, traceStartBroadcastChId1, traceStartBroadcastChId2); + } + + auto hwContextSubmit = metadata->getHwContext(); + if (!tranxHandler->submitTransaction(&aieDevInst, hwContextSubmit)) { + xrt_core::message::send(severity_level::error, "XRT", + "Aie trace control-code transaction submission failed."); + return false; + } + xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE Trace."); + + if (!tranxHandler->initializeTransaction(&aieDevInst, "AieTraceFlush")) { + xrt_core::message::send(severity_level::error, "XRT", + "AIE trace flush transaction initialization failed."); + return false; + } + + // Flush trace by forcing end event + // NOTE: this informs tiles to output remaining packets (even if partial) + for (const auto& loc : traceFlushLocs) + XAie_EventGenerate(&aieDevInst, loc, XAIE_CORE_MOD, coreTraceEndEvent); + for (const auto& loc : memoryTileTraceFlushLocs) + XAie_EventGenerate(&aieDevInst, loc, XAIE_MEM_MOD, memoryTileTraceEndEvent); + for (const auto& loc : interfaceTileTraceFlushLocs) + XAie_EventGenerate(&aieDevInst, loc, XAIE_PL_MOD, interfaceTileTraceEndEvent); + + tranxHandler->completeASM(&aieDevInst); + if (!tranxHandler->generateELF()) { + xrt_core::message::send(severity_level::error, "XRT", + "AIE trace flush ELF generation failed."); + return false; + } + + xrt_core::message::send(severity_level::info, "XRT", "Successfully generated ELF for AIE Trace Flush."); + + return true; + } // end setMetricsSettings + + /**************************************************************************** + * Flush trace modules by forcing end events + * + * Trace modules buffer partial packets. At end of run, this needs to be + * flushed using a custom end event. This applies to trace windowing and + * passive tiles like memory and interface. + * + ***************************************************************************/ + void AieTrace_VE2Impl::flushTraceModules() + { + //if (db->infoAvailable(xdp::info::ml_timeline)) { + // db->broadcast(VPDatabase::MessageType::READ_RECORD_TIMESTAMPS, nullptr); + // xrt_core::message::send(severity_level::debug, "XRT", "Done reading recorded timestamps."); + //} + + if (traceFlushLocs.empty() && memoryTileTraceFlushLocs.empty() + && interfaceTileTraceFlushLocs.empty()) + return; + + if (aie::isDebugVerbosity()) { + std::stringstream msg; + msg << "Flushing AIE trace by forcing end event for " << traceFlushLocs.size() + << " AIE tiles, " << memoryTileTraceFlushLocs.size() << " memory tiles, and " + << interfaceTileTraceFlushLocs.size() << " interface tiles."; + xrt_core::message::send(severity_level::debug, "XRT", msg.str()); + } + + traceFlushLocs.clear(); + memoryTileTraceFlushLocs.clear(); + interfaceTileTraceFlushLocs.clear(); + + xrt_core::message::send(severity_level::info, "XRT", "Before AIE trace flush."); + auto hwContext = metadata->getHwContext(); + if (!tranxHandler->submitELF(hwContext)) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE trace flush control-code submission failed."); + return; + } + xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE trace flush."); + } + + /*************************************************************************** + * Build broadcast network using specified channels + ***************************************************************************/ + void AieTrace_VE2Impl::build2ChannelBroadcastNetwork(void *hwCtxImpl, uint8_t broadcastId1, + uint8_t broadcastId2, XAie_Events event) + { + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(hwCtxImpl); + if (aiePartitionPt.empty()) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE trace: no partition info for trace-start broadcast; skipping broadcast network."); + return; + } + // Currently, assuming only one Hw Context is alive at a time + // uint8_t startCol = static_cast(aiePartitionPt.front().second.get("start_col")); + uint8_t startCol = 0; + // uint8_t numCols = static_cast(aiePartitionPt.front().second.get("num_cols")); + uint8_t numCols = 36; + const uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); + + std::vector maxRowAtCol(startCol + numCols, 0); + for (auto& tileMetric : metadata->getConfigMetrics()) { + auto tile = tileMetric.first; + auto col = tile.col; + auto row = tile.row; + maxRowAtCol[startCol + col] = std::max(maxRowAtCol[col], (uint8_t)row); + } + + XAie_Events bcastEvent2_PL = static_cast(XAIE_EVENT_BROADCAST_A_0_PL + broadcastId2); + XAie_EventBroadcast(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, broadcastId2, event); + + for (uint8_t col = startCol; col < (startCol + numCols); col++) { + for (uint8_t row = 0; row <= maxRowAtCol[col]; row++) { + module_type tileType = aie::getModuleType(row, metadata->getRowOffset()); + auto loc = XAie_TileLoc(col, row); + + // shim tile + if (tileType == module_type::shim) { + // first channel is only used to send north + if (col == startCol) { + XAie_EventBroadcast(&aieDevInst, loc, XAIE_PL_MOD, broadcastId1, event); + } else { + XAie_EventBroadcast(&aieDevInst, loc, XAIE_PL_MOD, broadcastId1, bcastEvent2_PL); + } + if (maxRowAtCol[col] != row) { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST); + } else { + XAie_EventBroadcastBlockDir( + &aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST | XAIE_EVENT_BROADCAST_NORTH); + } + + // second channel is only used to send east + if (col != startCol + numCols - 1) { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId2, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_NORTH); + } else { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId2, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_NORTH); + } + } + + // mem tile + else if (tileType == module_type::mem_tile) { + if (maxRowAtCol[col] != row) { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_MEM_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST); + } else { + XAie_EventBroadcastBlockDir( + &aieDevInst, loc, XAIE_MEM_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST | XAIE_EVENT_BROADCAST_NORTH); + } + } + + // core tile + else { + if (maxRowAtCol[col] != row) { + XAie_EventBroadcastBlockDir(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST); + } else { + XAie_EventBroadcastBlockDir( + &aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_SOUTH | XAIE_EVENT_BROADCAST_WEST | XAIE_EVENT_BROADCAST_EAST | XAIE_EVENT_BROADCAST_NORTH); + } + } + } + } + } + + /*************************************************************************** + * Reset using broadcast network on specified channels + ***************************************************************************/ + void AieTrace_VE2Impl::reset2ChannelBroadcastNetwork(void *hwCtxImpl, uint8_t broadcastId1, + uint8_t broadcastId2) + { + boost::property_tree::ptree aiePartitionPt = xdp::aie::getAIEPartitionInfo(hwCtxImpl); + if (aiePartitionPt.empty()) { + xrt_core::message::send(severity_level::warning, "XRT", + "AIE trace: no partition info for trace-start broadcast reset; skipping."); + return; + } + // Currently, assuming only one Hw Context is alive at a time + //uint8_t startCol = static_cast(aiePartitionPt.back().second.get("start_col")); + uint8_t startCol = 0; + //uint8_t numCols = static_cast(aiePartitionPt.back().second.get("num_cols")); + uint8_t numCols = 36; + const uint8_t startColShift = metadata->getPartitionOverlayStartCols().front(); + + std::vector maxRowAtCol(startCol + numCols, 0); + for (auto& tileMetric : metadata->getConfigMetrics()) { + auto tile = tileMetric.first; + auto col = tile.col; + auto row = tile.row; + maxRowAtCol[startCol + col] = std::max(maxRowAtCol[col], (uint8_t)row); + } + + XAie_EventBroadcastReset(&aieDevInst, XAie_TileLoc(startCol, 0), XAIE_PL_MOD, broadcastId2); + + for (uint8_t col = startCol; col < (startCol + numCols); col++) { + for (uint8_t row = 0; row <= maxRowAtCol[col]; row++) { + module_type tileType = aie::getModuleType(row, metadata->getRowOffset()); + auto loc = XAie_TileLoc(col, row); + + // shim tile + if (tileType == module_type::shim) { + XAie_EventBroadcastReset(&aieDevInst, loc, XAIE_PL_MOD, broadcastId1); + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_ALL); + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_A, broadcastId2, + XAIE_EVENT_BROADCAST_ALL); + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_PL_MOD, XAIE_EVENT_SWITCH_B, broadcastId2, + XAIE_EVENT_BROADCAST_ALL); + } + + // mem tile + else if (tileType == module_type::mem_tile) { + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_MEM_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_ALL); + } + + // core tile + else { + XAie_EventBroadcastUnblockDir(&aieDevInst, loc, XAIE_CORE_MOD, XAIE_EVENT_SWITCH_A, broadcastId1, + XAIE_EVENT_BROADCAST_ALL); + } + } + } + } + + /**************************************************************************** + * Modify events in metric set based on type and channel + ***************************************************************************/ + void AieTrace_VE2Impl::modifyEvents(module_type type, io_type subtype, + const std::string metricSet, uint8_t channel, + std::vector& events) + { + // Only needed for GMIO DMA channel 1 + if ((type != module_type::shim) || (subtype == io_type::PLIO) || (channel == 0)) + return; + + // Check type to minimize replacements + if (aie::isInputSet(type, metricSet)) { + // Input or MM2S + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_MM2S_0_START_TASK_PL, XAIE_EVENT_NOC0_DMA_MM2S_1_START_TASK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_MM2S_0_FINISHED_BD_PL, XAIE_EVENT_NOC0_DMA_MM2S_1_FINISHED_BD_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_MM2S_0_FINISHED_TASK_PL, XAIE_EVENT_NOC0_DMA_MM2S_1_FINISHED_TASK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_MM2S_0_STALLED_LOCK_PL, XAIE_EVENT_NOC0_DMA_MM2S_1_STALLED_LOCK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_MM2S_0_STREAM_BACKPRESSURE_PL, XAIE_EVENT_NOC0_DMA_MM2S_1_STREAM_BACKPRESSURE_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_MM2S_0_MEMORY_STARVATION_PL, XAIE_EVENT_NOC0_DMA_MM2S_1_MEMORY_STARVATION_PL); + } else { + // Output or S2MM + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_S2MM_0_START_TASK_PL, XAIE_EVENT_NOC0_DMA_S2MM_1_START_TASK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_S2MM_0_FINISHED_BD_PL, XAIE_EVENT_NOC0_DMA_S2MM_1_FINISHED_BD_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_S2MM_0_FINISHED_TASK_PL, XAIE_EVENT_NOC0_DMA_S2MM_1_FINISHED_TASK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_S2MM_0_STALLED_LOCK_PL, XAIE_EVENT_NOC0_DMA_S2MM_1_STALLED_LOCK_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_S2MM_0_STREAM_STARVATION_PL, XAIE_EVENT_NOC0_DMA_S2MM_1_STREAM_STARVATION_PL); + std::replace(events.begin(), events.end(), + XAIE_EVENT_NOC0_DMA_S2MM_0_MEMORY_BACKPRESSURE_PL, XAIE_EVENT_NOC0_DMA_S2MM_1_MEMORY_BACKPRESSURE_PL); + } + } + + uint32_t AieTrace_VE2Impl::bcIdToEvent(int bcId) + { + return bcId + CORE_BROADCAST_EVENT_BASE; + } + + /**************************************************************************** + * Configure stream switch event ports for monitoring purposes + ***************************************************************************/ + void + AieTrace_VE2Impl::configStreamSwitchPorts(const tile_type& tile, const XAie_LocType loc, + const module_type type, const std::string metricSet, + const uint8_t channel0, const uint8_t channel1, + std::vector& events, aie_cfg_base& config) + { + std::set portSet; + + // Traverse all counters and request monitor ports as needed + for (int i=0; i < events.size(); ++i) { + // Ensure applicable event + auto event = events.at(i); + if (!xdp::aie::isStreamSwitchPortEvent(event)) + continue; + + auto portnum = xdp::aie::getPortNumberFromEvent(event); + uint8_t channelNum = portnum % 2; + uint8_t channel = (channelNum == 0) ? channel0 : channel1; + + // New port needed: reserve, configure, and store + if (portSet.find(portnum) == portSet.end()) { + portSet.insert(portnum); + + if (type == module_type::core) { + // AIE Tiles - Monitor DMA channels + bool isMaster = ((portnum >= 2) || (metricSet.find("s2mm") != std::string::npos)); + auto slaveOrMaster = isMaster ? XAIE_STRMSW_MASTER : XAIE_STRMSW_SLAVE; + std::string typeName = isMaster ? "S2MM" : "MM2S"; + std::string msg = "Configuring core module stream switch to monitor DMA " + + typeName + " channel " + std::to_string(channelNum); + xrt_core::message::send(severity_level::debug, "XRT", msg); + XAie_EventSelectStrmPort(&aieDevInst, loc, portnum, slaveOrMaster, DMA, channelNum); + + // Record for runtime config file + // NOTE: channel info informs back-end there will be events on that channel + config.port_trace_ids[portnum] = channelNum; + config.port_trace_is_master[portnum] = isMaster; + config.port_trace_names[portnum] = tile.port_names.at(portnum); + + if (isMaster) { + config.s2mm_channels[channelNum] = channelNum; + if (channelNum < tile.s2mm_names.size()) + config.s2mm_names[channelNum] = tile.s2mm_names.at(channelNum); + } + else { + config.mm2s_channels[channelNum] = channelNum; + if (channelNum < tile.mm2s_names.size()) + config.mm2s_names[channelNum] = tile.mm2s_names.at(channelNum); + } + } + // Interface tiles (e.g., PLIO, GMIO) + else if (type == module_type::shim) { + // NOTE: skip configuration of extra ports for tile if stream_ids are not available. + if (portnum >= tile.stream_ids.size()) + continue; + + auto slaveOrMaster = (tile.is_master_vec.at(portnum) == 0) ? XAIE_STRMSW_SLAVE : XAIE_STRMSW_MASTER; + std::string typeName = (tile.is_master_vec.at(portnum) == 0) ? "slave" : "master"; + uint8_t streamPortId = static_cast(tile.stream_ids.at(portnum)); + + std::string msg = "Configuring interface tile stream switch to monitor " + + typeName + " stream port " + std::to_string(streamPortId); + xrt_core::message::send(severity_level::debug, "XRT", msg); + XAie_EventSelectStrmPort(&aieDevInst, loc, portnum, slaveOrMaster, SOUTH, streamPortId); + + // Record for runtime config file + config.port_trace_ids[portnum] = (tile.subtype == io_type::PLIO) ? portnum : channel; + config.port_trace_is_master[portnum] = (tile.is_master_vec.at(portnum) != 0); + if (tile.subtype == io_type::PLIO) { + if (streamPortId < tile.port_names.size()) + config.port_trace_names[portnum] = tile.port_names.at(streamPortId); + } else { + if (channel < tile.port_names.size()) + config.port_trace_names[portnum] = tile.port_names.at(channel); + } + + if (tile.is_master_vec.at(portnum) == 0) { + config.mm2s_channels[channelNum] = channel; + if (channelNum < tile.mm2s_names.size()) + config.mm2s_names[channelNum] = tile.mm2s_names.at(channelNum); + } + else { + config.s2mm_channels[channelNum] = channel; + if (channelNum < tile.s2mm_names.size()) + config.s2mm_names[channelNum] = tile.s2mm_names.at(channelNum); + } + } + else { + // Memory tiles + auto slaveOrMaster = aie::isInputSet(type, metricSet) ? XAIE_STRMSW_MASTER : XAIE_STRMSW_SLAVE; + std::string typeName = (slaveOrMaster == XAIE_STRMSW_MASTER) ? "master" : "slave"; + std::string msg = "Configuring memory tile stream switch to monitor " + + typeName + " stream port " + std::to_string(channel); + xrt_core::message::send(severity_level::debug, "XRT", msg); + XAie_EventSelectStrmPort(&aieDevInst, loc, portnum, slaveOrMaster, DMA, channel); + + // Record for runtime config file + config.port_trace_ids[portnum] = channel; + config.port_trace_is_master[portnum] = (slaveOrMaster == XAIE_STRMSW_MASTER); + if (portnum < tile.port_names.size()) + config.port_trace_names[portnum] = tile.port_names.at(portnum); + } + } + } + + if ((type == module_type::shim) && (tile.subtype == io_type::PLIO) && + (portSet.size() < tile.stream_ids.size())) { + std::string msg = "Interface tile " + std::to_string(tile.col) + " has more " + + "PLIO than can be monitored by metric set " + metricSet + ". Please " + + "run again with different trace settings or choose a different set."; + xrt_core::message::send(severity_level::warning, "XRT", msg); + } + + portSet.clear(); + } + + /**************************************************************************** + * Configure combo events (AIE tiles only) + ***************************************************************************/ + std::vector + AieTrace_VE2Impl::configComboEvents(const XAie_LocType loc, const XAie_ModuleType mod, + const module_type type, const std::string metricSet, + aie_cfg_base& config) + { + // Only needed for core/memory modules and metric sets that include DMA events + if (!xdp::aie::isDmaSet(metricSet) || ((type != module_type::core) && (type != module_type::dma))) + return {}; + + std::vector comboEvents; + + if (mod == XAIE_CORE_MOD) { + comboEvents.push_back(XAIE_EVENT_COMBO_EVENT_2_CORE); + + // Combo2 = Port_Idle_0 OR Port_Idle_1 OR Port_Idle_2 OR Port_Idle_3 + std::vector events = {XAIE_EVENT_PORT_IDLE_0_CORE, + XAIE_EVENT_PORT_IDLE_1_CORE, XAIE_EVENT_PORT_IDLE_2_CORE, + XAIE_EVENT_PORT_IDLE_3_CORE}; + std::vector opts = {XAIE_EVENT_COMBO_E1_OR_E2, + XAIE_EVENT_COMBO_E1_OR_E2, XAIE_EVENT_COMBO_E1_OR_E2}; + + // Capture in config class to report later + for (int i=0; i < NUM_COMBO_EVENT_CONTROL; ++i) + config.combo_event_control[i] = 2; + for (int i=0; i < events.size(); ++i) { + uint16_t phyEvent = 0; + XAie_EventLogicalToPhysicalConv(&aieDevInst, loc, mod, events.at(i), &phyEvent); + config.combo_event_input[i] = phyEvent; + } + + // Set events and trigger on OR of events + XAie_EventComboConfig(&aieDevInst, loc, mod, XAIE_EVENT_COMBO0, opts[0], events[0], events[1]); + XAie_EventComboConfig(&aieDevInst, loc, mod, XAIE_EVENT_COMBO1, opts[1], events[2], events[3]); + XAie_EventComboConfig(&aieDevInst, loc, mod, XAIE_EVENT_COMBO2, opts[2], XAIE_EVENT_COMBO_EVENT_0_PL, XAIE_EVENT_COMBO_EVENT_1_PL); + return comboEvents; + } + + // Combo events do not auto-broadcast from core to memory module, + // so let's avoid the complexity and find a different method. + #if 0 + // Below is for memory modules + + // Memory_Combo0 = (Active OR Group_Stream_Switch) + auto comboEvent0 = xaieTile.mem().comboEvent(); + comboEvents.push_back(XAIE_EVENT_COMBO_EVENT_0_MEM); + + std::vector events0; + events0.push_back(XAIE_EVENT_ACTIVE_CORE); + events0.push_back(XAIE_EVENT_GROUP_STREAM_SWITCH_CORE); + std::vector opts0; + opts0.push_back(XAIE_EVENT_COMBO_E1_OR_E2); + + comboEvent0->setEvents(events0, opts0); + + // Memory_Combo1 = (Group_Core_Program_Flow AND Core_Combo2) + auto comboEvent1 = xaieTile.mem().comboEvent(); + comboEvents.push_back(XAIE_EVENT_COMBO_EVENT_1_MEM); + + std::vector events1; + events1.push_back(XAIE_EVENT_GROUP_CORE_PROGRAM_FLOW_CORE); + events1.push_back(XAIE_EVENT_COMBO_EVENT_2_CORE); + std::vector opts1; + opts1.push_back(XAIE_EVENT_COMBO_E1_AND_E2); + + comboEvent1->setEvents(events1, opts1); + #else + // Since we're tracing DMA events, start trace right away. + // Specify user event 0 as trace end so we can flush after run. + comboEvents.push_back(XAIE_EVENT_TRUE_MEM); + comboEvents.push_back(XAIE_EVENT_USER_EVENT_0_MEM); + #endif + return comboEvents; + } + + /**************************************************************************** + * Configure group events (core modules only) + ***************************************************************************/ + void AieTrace_VE2Impl::configGroupEvents(const XAie_LocType loc, const XAie_ModuleType mod, + const module_type type, const std::string metricSet) + { + // Only needed for core module and metric sets that include DMA events + if (!aie::isDmaSet(metricSet) || (type != module_type::core)) + return; + + // Set masks for group events + XAie_EventGroupControl(&aieDevInst, loc, mod, XAIE_EVENT_GROUP_CORE_PROGRAM_FLOW_CORE, + GROUP_CORE_FUNCTIONS_MASK); + XAie_EventGroupControl(&aieDevInst, loc, mod, XAIE_EVENT_GROUP_CORE_STALL_CORE, + GROUP_CORE_STALL_MASK); + XAie_EventGroupControl(&aieDevInst, loc, mod, XAIE_EVENT_GROUP_STREAM_SWITCH_CORE, + GROUP_STREAM_SWITCH_RUNNING_MASK); + } + + /**************************************************************************** + * Configure event selection (memory tiles only) + ***************************************************************************/ + void AieTrace_VE2Impl::configEventSelections(const tile_type& tile, const XAie_LocType loc, const module_type type, + const std::string metricSet, const uint8_t channel0, + const uint8_t channel1, aie_cfg_base& config) + { + if (type != module_type::mem_tile) + return; + + XAie_DmaDirection dmaDir = aie::isInputSet(type, metricSet) ? DMA_S2MM : DMA_MM2S; + + if (aie::isDebugVerbosity()) { + std::string typeName = (dmaDir == DMA_S2MM) ? "S2MM" : "MM2S"; + std::string msg = "Configuring event selections for DMA " + typeName + " channels " + + std::to_string(channel0) + " and " + std::to_string(channel1); + xrt_core::message::send(severity_level::debug, "XRT", msg); + } + + XAie_EventSelectDmaChannel(&aieDevInst, loc, 0, dmaDir, channel0); + XAie_EventSelectDmaChannel(&aieDevInst, loc, 1, dmaDir, channel1); + + // Record for runtime config file + config.port_trace_ids[0] = channel0; + config.port_trace_ids[1] = channel1; + if (aie::isInputSet(type, metricSet)) { + config.port_trace_is_master[0] = true; + config.port_trace_is_master[1] = true; + config.s2mm_channels[0] = channel0; + if (channel0 < tile.s2mm_names.size()) + config.s2mm_names[0] = tile.s2mm_names[channel0]; + if (channel0 != channel1) { + config.s2mm_channels[1] = channel1; + if (channel1 < tile.s2mm_names.size()) + config.s2mm_names[1] = tile.s2mm_names[channel1]; + } + } + else { + config.port_trace_is_master[0] = false; + config.port_trace_is_master[1] = false; + config.mm2s_channels[0] = channel0; + if (channel0 < tile.mm2s_names.size()) + config.mm2s_names[0] = tile.mm2s_names[channel0]; + if (channel0 != channel1) { + config.mm2s_channels[1] = channel1; + if (channel1 < tile.mm2s_names.size()) + config.mm2s_names[1] = tile.mm2s_names[channel1]; + } + } + } + + /**************************************************************************** + * Configure edge detection events + ***************************************************************************/ + void AieTrace_VE2Impl::configEdgeEvents(const tile_type& tile, + const module_type type, const std::string metricSet, + const XAie_Events event, const uint8_t channel) + { + if ((event != XAIE_EVENT_EDGE_DETECTION_EVENT_0_MEM_TILE) + && (event != XAIE_EVENT_EDGE_DETECTION_EVENT_1_MEM_TILE) + && (event != XAIE_EVENT_EDGE_DETECTION_EVENT_0_MEM) + && (event != XAIE_EVENT_EDGE_DETECTION_EVENT_1_MEM)) + return; + + // Catch memory tiles + if (type == module_type::mem_tile) { + // Event is DMA_S2MM_Sel0_stream_starvation or DMA_MM2S_Sel0_stalled_lock + uint16_t eventNum = aie::isInputSet(type, metricSet) + ? EVENT_MEM_TILE_DMA_S2MM_SEL0_STREAM_STARVATION + : EVENT_MEM_TILE_DMA_MM2S_SEL0_STALLED_LOCK; + + // Register Edge_Detection_event_control + // 26 Event 1 triggered on falling edge + // 25 Event 1 triggered on rising edge + // 23:16 Input event for edge event 1 + // 10 Event 0 triggered on falling edge + // 9 Event 0 triggered on rising edge + // 7:0 Input event for edge event 0 + uint32_t edgeEventsValue = (1 << 26) + (eventNum << 16) + (1 << 9) + eventNum; + + xrt_core::message::send(severity_level::debug, "XRT", + "Configuring memory tile edge events to detect rise and fall of event " + + std::to_string(eventNum)); + + auto tileOffset = _XAie_GetTileAddr(&aieDevInst, tile.row, tile.col); + XAie_Write32(&aieDevInst, tileOffset + AIE_OFFSET_EDGE_CONTROL_MEM_TILE, + edgeEventsValue); + return; + } + + // Below is AIE tile support + + // Event is DMA_MM2S_stalled_lock or DMA_S2MM_stream_starvation + uint16_t eventNum = aie::isInputSet(type, metricSet) + ? ((channel == 0) ? EVENT_MEM_DMA_MM2S_0_STALLED_LOCK + : EVENT_MEM_DMA_MM2S_1_STALLED_LOCK) + : ((channel == 0) ? EVENT_MEM_DMA_S2MM_0_STREAM_STARVATION + : EVENT_MEM_DMA_S2MM_1_STREAM_STARVATION); + + // Register Edge_Detection_event_control + // 26 Event 1 triggered on falling edge + // 25 Event 1 triggered on rising edge + // 23:16 Input event for edge event 1 + // 10 Event 0 triggered on falling edge + // 9 Event 0 triggered on rising edge + // 7:0 Input event for edge event 0 + uint32_t edgeEventsValue = (1 << 26) + (eventNum << 16) + (1 << 9) + eventNum; + + xrt_core::message::send(severity_level::debug, "XRT", + "Configuring AIE tile edge events to detect rise and fall of event " + + std::to_string(eventNum)); + + auto tileOffset = _XAie_GetTileAddr(&aieDevInst, tile.row, tile.col); + XAie_Write32(&aieDevInst, tileOffset + AIE_OFFSET_EDGE_CONTROL_MEM, + edgeEventsValue); + } + + /**************************************************************************** + * Poll AIE timers (for system timeline only) + ***************************************************************************/ + void AieTrace_VE2Impl::pollTimers(uint64_t index, void* handle) + { + // TODO: Poll timers (needed for system timeline only) + (void)index; + (void)handle; + } + + /**************************************************************************** + * Set AIE device instance + ***************************************************************************/ + void* AieTrace_VE2Impl::setAieDeviceInst(void* handle, uint64_t deviceID) + { + (void)handle; + (void)deviceID; + // XDNA has no shim-provided XAie_DevInst for offload; trace uses hw_context + // in AIETraceOffloadManager / AIETraceOffload (see aie_trace_offload_ve2). + return nullptr; + } + + bool AieTrace_VE2Impl::tileHasFreeRsc(xaiefal::XAieDev* aieDevice, XAie_LocType& loc, const module_type type, const std::string& metricSet) {} + void AieTrace_VE2Impl::freeResources() {} + + +} // namespace xdp + +#endif \ No newline at end of file diff --git a/profile/plugin/aie_trace/ve2/aie_trace.h b/profile/plugin/aie_trace/ve2/aie_trace.h index 2ea803a6..bea1c49f 100755 --- a/profile/plugin/aie_trace/ve2/aie_trace.h +++ b/profile/plugin/aie_trace/ve2/aie_trace.h @@ -10,6 +10,10 @@ #include "xdp/profile/plugin/aie_trace/aie_trace_impl.h" #include "xdp/profile/plugin/aie_trace/util/aie_trace_config.h" +#ifndef XDP_VE2_ZOCL_BUILD +#include "xdp/profile/device/common/ve2/ve2_transaction.h" +#endif + namespace xdp { class AieTrace_VE2Impl : public AieTraceImpl { @@ -24,23 +28,17 @@ namespace xdp { void* setAieDeviceInst(void* handle, uint64_t deviceID) override; private: + // Common helpers used by both VE2 flows. uint64_t checkTraceBufSize(uint64_t size) override; bool tileHasFreeRsc(xaiefal::XAieDev* aieDevice, XAie_LocType& loc, const module_type type, const std::string& metricSet); bool setMetricsSettings(uint64_t deviceId, void* handle); - bool configureWindowedEventTrace(xaiefal::XAieDev* aieDevice); - private: typedef XAie_Events EventType; typedef std::vector EventVector; typedef std::vector ValueVector; - XAie_DevInst* aieDevInst = nullptr; - xaiefal::XAieDev* aieDevice = nullptr; // AIE resources - std::vector> perfCounters; - std::vector> streamPorts; - std::map coreEventSets; std::map memoryEventSets; std::map memoryTileEventSets; @@ -73,7 +71,52 @@ namespace xdp { // Keep track of number of events reserved per module and/or tile int mNumTileTraceEvents[static_cast(module_type::num_types)][NUM_TRACE_EVENTS + 1]; - }; + +#ifdef XDP_VE2_ZOCL_BUILD + // VE2 ZOCL flow (FAL-backed). + XAie_DevInst* aieDevInst = nullptr; + xaiefal::XAieDev* aieDevice = nullptr; + std::vector> perfCounters; + std::vector> streamPorts; + + bool configureWindowedEventTrace(xaiefal::XAieDev* aieDevice); + +#else + // VE2 XDNA flow (no FAL resource ownership path). + // Control-code order: AieTraceOffload (initReadTrace), AieTraceMetrics (updateDevice), + // AieTraceFlush (end of setMetricsSettings). + XAie_DevInst aieDevInst = {0}; + std::unique_ptr tranxHandler; + bool m_trace_start_broadcast = false; + EventType memoryModTraceStartEvent; + + bool configureWindowedEventTrace(void* handle); + void build2ChannelBroadcastNetwork(void* handle, uint8_t broadcastId1, + uint8_t broadcastId2, XAie_Events event); + void reset2ChannelBroadcastNetwork(void* handle, uint8_t broadcastId1, + uint8_t broadcastId2); + uint32_t bcIdToEvent(int bcId); + + void configStreamSwitchPorts(const tile_type& tile, const XAie_LocType loc, + const module_type type, const std::string metricSet, + const uint8_t channel0, const uint8_t channel1, + std::vector& events, aie_cfg_base& config); + std::vector configComboEvents(const XAie_LocType loc, const XAie_ModuleType mod, + const module_type type, const std::string metricSet, + aie_cfg_base& config); + void configGroupEvents(const XAie_LocType loc, const XAie_ModuleType mod, + const module_type type, const std::string metricSet); + void configEventSelections(const tile_type& tile, const XAie_LocType loc, + const module_type type, const std::string metricSet, + const uint8_t channel0, const uint8_t channel1, + aie_cfg_base& config); + void configEdgeEvents(const tile_type& tile, const module_type type, + const std::string metricSet, const XAie_Events event, + const uint8_t channel = 0); + void modifyEvents(module_type type, io_type subtype, const std::string metricSet, + uint8_t channel, std::vector& events); +#endif +}; } // namespace xdp