Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions profile/device/aie_trace/client/aie_trace_offload_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@


extern "C" {
#include <xaiengine.h>
#include <xaiengine/xaiegbl_params.h>
#include <aie_codegen.h>
#include <aie_codegen_inc/xaiegbl_params.h>
}

namespace xdp {
Expand Down
205 changes: 195 additions & 10 deletions profile/device/aie_trace/ve2/aie_trace_offload_ve2.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// VE2 class
/**
* Copyright (C) 2019-2022 Xilinx, Inc
* Copyright (C) 2022-2024 Advanced Micro Devices, Inc. - All rights reserved
Expand All @@ -17,6 +18,7 @@

#define XDP_PLUGIN_SOURCE

#include <chrono>
#include <iostream>

#include "core/include/xrt.h"
Expand All @@ -38,9 +40,6 @@
#include <unistd.h>
#include <sys/mman.h>




namespace xdp {


Expand All @@ -51,7 +50,12 @@ AIETraceOffload::AIETraceOffload
, bool isPlio
, uint64_t totalSize
, uint64_t numStrm
#if defined(XDP_VE2_BUILD) && defined(XDP_VE2_ZOCL_BUILD)
, XAie_DevInst* devInstance
#elif defined(XDP_VE2_BUILD)
, xrt::hw_context ctx
, std::shared_ptr<AieTraceMetadata> md
#endif
)
: deviceHandle(handle)
, deviceId(id)
Expand All @@ -66,8 +70,12 @@ AIETraceOffload::AIETraceOffload
, offloadStatus(AIEOffloadThreadStatus::IDLE)
, mEnCircularBuf(false)
, mCircularBufOverwrite(false)
#if defined(XDP_VE2_BUILD) && defined(XDP_VE2_ZOCL_BUILD)
, devInst(devInstance)

#elif defined(XDP_VE2_BUILD)
, context(ctx)
, metadata(md)
#endif
{
bufAllocSz = deviceIntf->getAlignedTraceBufSize(totalSz, static_cast<unsigned int>(numStream));

Expand All @@ -85,6 +93,7 @@ AIETraceOffload::~AIETraceOffload()
offloadThread.join();
}

#ifdef XDP_VE2_ZOCL_BUILD
bool AIETraceOffload::initReadTrace()
{
// Submit nop.elf to initialize AIE array before BD configuration
Expand Down Expand Up @@ -161,8 +170,8 @@ bool AIETraceOffload::initReadTrace()

// Compute BD: use metadata value if set, otherwise channelNumber * 4
uint16_t bdNum = (traceGMIO->bufferDescriptorId != UINT16_MAX)
? traceGMIO->bufferDescriptorId
: channelNumber * 4;
? traceGMIO->bufferDescriptorId
: channelNumber * 4;
std::stringstream bdMsg;
bdMsg << "AIE Trace: Using BD " << bdNum << " for channel " << (int)channelNumber
<< " on shim column " << (int)traceGMIO->shimColumn;
Expand All @@ -177,7 +186,116 @@ bool AIETraceOffload::initReadTrace()
bufferInitialized = true;
return bufferInitialized;
}
#else // XDNA flow
bool AIETraceOffload::initReadTrace()
{
// TODO: is this only zocl specific or do we need it for XDNA as well?
// Submit nop.elf to initialize AIE array before BD configuration
if (!aie::submitNopElf(deviceHandle)) {
xrt_core::message::send(xrt_core::message::severity_level::warning, "XRT",
"Failed to submit nop.elf. AIE trace configuration will not proceed.");
return false;
}

xrt_core::message::send(severity_level::info, "XRT", "Starting configuration for VE2.");

buffers.clear();
buffers.resize(numStream);

xdp::aie::driver_config meta_config = metadata->getAIEConfigMetadata();
XAie_Config cfg{
meta_config.hw_gen,
meta_config.base_address,
meta_config.column_shift,
meta_config.row_shift,
meta_config.num_rows,
meta_config.num_columns,
meta_config.shim_row,
meta_config.mem_row_start,
meta_config.mem_num_rows,
meta_config.aie_tile_row_start,
meta_config.aie_tile_num_rows,
{0} // PartProp
};

auto RC = XAie_CfgInitialize(&aieDevInst, &cfg);
if (RC != XAIE_OK) {
xrt_core::message::send(severity_level::warning, "XRT", "AIE TRACE OFFLOAD: AIE Driver Initialization Failed.");
return false;
}

tranxHandler = std::make_unique<aie::VE2Transaction>();
if (!tranxHandler->initializeTransaction(&aieDevInst, "AieTraceOffload"))
return false;

gmioDMAInsts.clear();
gmioDMAInsts.resize(numStream);

for (uint64_t i = 0; i < numStream ; ++i) {
xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT",
"Allocating trace buffer of size " + std::to_string(bufAllocSz) + " for AIE Stream "
+ std::to_string(i));
xrt_bos.emplace_back(xrt::bo(context.get_device(), bufAllocSz,
XRT_BO_FLAGS_HOST_ONLY, tranxHandler->getGroupID(0, context)));

buffers[i].bufId = xrt_bos.size();
if (!buffers[i].bufId) {
bufferInitialized = false;
return bufferInitialized;
}

if (!xrt_bos.empty()) {
auto bo_map = xrt_bos.back().map<uint8_t*>();
memset(bo_map, 0, bufAllocSz);
}

VPDatabase* db = VPDatabase::Instance();
TraceGMIO* traceGMIO = (db->getStaticInfo()).getTraceGMIO(deviceId, i);

// channelNumber: (0-S2MM0,1-S2MM1,2-MM2S0,3-MM2S1)
// Enable shim DMA channel, need to start first so the status is correct
uint16_t channelNumber = (traceGMIO->channelNumber > 1) ? (traceGMIO->channelNumber - 2) : traceGMIO->channelNumber;
XAie_DmaDirection dir = (traceGMIO->channelNumber > 1) ? DMA_MM2S : DMA_S2MM;

gmioDMAInsts[i].gmioTileLoc = XAie_TileLoc(traceGMIO->shimColumn, 0);

RC = XAie_DmaDescInit(&aieDevInst, &(gmioDMAInsts[i].shimDmaInst), gmioDMAInsts[i].gmioTileLoc);
RC = XAie_DmaChannelEnable(&aieDevInst, gmioDMAInsts[i].gmioTileLoc, channelNumber, dir);
RC = XAie_DmaSetAxi(&(gmioDMAInsts[i].shimDmaInst), 0, traceGMIO->burstLength, 0, 0, 0);

// TODO: get board-specific values
// // TODO: XAie_DmaSetAddrLen like this?
RC = XAie_DmaSetAddrLen(&(gmioDMAInsts[i].shimDmaInst), xrt_bos[i].address(), static_cast<uint32_t>(bufAllocSz));
// // TODO: or this?
// char* vaddr = reinterpret_cast<char *>(mmap(NULL, bufAllocSz, PROT_READ | PROT_WRITE, MAP_SHARED, boExportHandle, 0));
// RC = XAie_DmaSetAddrLen(&(gmioDMAInsts[i].shimDmaInst), (uint64_t)vaddr, bufAllocSz);

XAie_DmaEnableBd(&(gmioDMAInsts[i].shimDmaInst));

uint16_t bdNum = (traceGMIO->bufferDescriptorId != UINT16_MAX) ? traceGMIO->bufferDescriptorId : channelNumber * 4;
std::stringstream bdMsg;
bdMsg << "AIE Trace: Using BD " << bdNum << " for channel " << (int)channelNumber << " on shim column " << (int)traceGMIO->shimColumn;
xrt_core::message::send(xrt_core::message::severity_level::debug, "XRT", bdMsg.str());

// Write to shim DMA BD AxiMM registers
XAie_DmaWriteBd(&aieDevInst, &(gmioDMAInsts[i].shimDmaInst), gmioDMAInsts[i].gmioTileLoc, bdNum);

XAie_DmaChannelPushBdToQueue(&aieDevInst, gmioDMAInsts[i].gmioTileLoc, channelNumber, dir, bdNum);

if (!tranxHandler->submitTransaction(&aieDevInst, context))
return false;

xrt_core::message::send(severity_level::info, "XRT", "Successfully scheduled AIE Trace Offloading VE2.");
}

bufferInitialized = true;
return bufferInitialized;
}
#endif

// TODO: NPU3 does not have lines 199-213. why?

#ifdef XDP_VE2_ZOCL_BUILD
void AIETraceOffload::endReadTrace()
{
// reset
Expand All @@ -187,7 +305,7 @@ void AIETraceOffload::endReadTrace()

if (isPLIO) {
deviceIntf->resetAIETs2mm(i);
// deviceIntf->freeTraceBuf(b.bufId);
// deviceIntf->freeTraceBuf(b.bufId);
} else {
VPDatabase* db = VPDatabase::Instance();
TraceGMIO* traceGMIO = (db->getStaticInfo()).getTraceGMIO(deviceId, i);
Expand All @@ -204,6 +322,29 @@ void AIETraceOffload::endReadTrace()
}
bufferInitialized = false;
}
#else // XDNA
void AIETraceOffload::endReadTrace()
{
// reset
for (uint64_t i = 0; i < numStream ; ++i) {
if (!buffers[i].bufId)
continue;

VPDatabase* db = VPDatabase::Instance();
TraceGMIO* traceGMIO = (db->getStaticInfo()).getTraceGMIO(deviceId, i);

// channelNumber: (0-S2MM0,1-S2MM1,2-MM2S0,3-MM2S1)
// Enable shim DMA channel, need to start first so the status is correct
uint16_t channelNumber = (traceGMIO->channelNumber > 1) ? (traceGMIO->channelNumber - 2) : traceGMIO->channelNumber;
XAie_DmaDirection dir = (traceGMIO->channelNumber > 1) ? DMA_MM2S : DMA_S2MM;

XAie_DmaChannelDisable(&aieDevInst, gmioDMAInsts[i].gmioTileLoc, channelNumber, dir);

buffers[i].bufId = 0;
}
bufferInitialized = false;
}
#endif

void AIETraceOffload::readTraceGMIO(bool final)
{
Expand All @@ -225,8 +366,13 @@ void AIETraceOffload::readTraceGMIO(bool final)
}
}

// TODO: only for zocl right now since xdna does not support plio right now, and this function is only for plio
void AIETraceOffload::readTracePLIO(bool final)
{
#if defined(XDP_VE2_BUILD) && ! defined(XDP_VE2_ZOCL_BUILD)
return;
#endif

if (mCircularBufOverwrite)
return;

Expand Down Expand Up @@ -310,6 +456,7 @@ void AIETraceOffload::readTracePLIO(bool final)
}
}

#ifdef XDP_VE2_ZOCL_BUILD
uint64_t AIETraceOffload::syncAndLog(uint64_t index)
{
auto& bd = buffers[index];
Expand Down Expand Up @@ -349,6 +496,39 @@ uint64_t AIETraceOffload::syncAndLog(uint64_t index)
traceLogger->addAIETraceData(index, hostBuf, nBytes, mEnCircularBuf);
return nBytes;
}
#else // XDNA
uint64_t AIETraceOffload::syncAndLog(uint64_t index)
{
auto& bd = buffers[index];

if (bd.offset >= bd.usedSz)
return 0;

// Amount of newly written trace
uint64_t nBytes = bd.usedSz - bd.offset;

// Sync to host
xrt_bos[index].sync(XCL_BO_SYNC_BO_FROM_DEVICE, nBytes, bd.offset);
auto in_bo_map = xrt_bos[index].map<uint32_t*>() + bd.offset;

if (!in_bo_map)
return 0;

// Find amount of non-zero data in buffer
if (!isPLIO)
nBytes = searchWrittenBytes((void*)in_bo_map, bufAllocSz);

// check for full buffer
if ((bd.offset + nBytes >= bufAllocSz) && !mEnCircularBuf) {
bd.isFull = true;
bd.offloadDone = true;
}

// Log nBytes of trace
traceLogger->addAIETraceData(index, (void*)in_bo_map, nBytes, mEnCircularBuf);
return nBytes;
}
#endif

bool AIETraceOffload::isTraceBufferFull()
{
Expand All @@ -360,8 +540,13 @@ bool AIETraceOffload::isTraceBufferFull()
return false;
}

// TODO: only for zocl right now since xdna does not support plio right now, and this function is only for plio
void AIETraceOffload::checkCircularBufferSupport()
{
#if defined(XDP_VE2_BUILD) && ! defined(XDP_VE2_ZOCL_BUILD)
return;
#endif

mEnCircularBuf = xrt_core::config::get_aie_trace_settings_reuse_buffer();
if (!mEnCircularBuf)
return;
Expand Down Expand Up @@ -458,9 +643,9 @@ void AIETraceOffload::offloadFinished()
uint64_t AIETraceOffload::searchWrittenBytes(void* buf, uint64_t bytes)
{
/*
* Look For trace boundary using binary search.
* Use Dword to be safe
*/
* Look For trace boundary using binary search.
* Use Dword to be safe
*/
auto words = static_cast<uint64_t *>(buf);
uint64_t wordcount = bytes / TRACE_PACKET_SIZE;

Expand Down
Loading
Loading