diff --git a/include/nvtop/extract_gpuinfo_common.h b/include/nvtop/extract_gpuinfo_common.h
index 9e4d1c9..5c0067b 100644
--- a/include/nvtop/extract_gpuinfo_common.h
+++ b/include/nvtop/extract_gpuinfo_common.h
@@ -240,4 +240,35 @@ inline unsigned busy_usage_from_time_usage_round(uint64_t current_use_ns, uint64
 
 unsigned nvtop_pcie_gen_from_link_speed(unsigned linkSpeed);
 
+// NVLink support
+#define NVTOP_NVLINK_MAX_LINKS 36
+
+struct nvlink_info {
+  unsigned num_links;                 // Number of NVLink links on this device
+  unsigned version;                   // NVLink version (e.g. 3 for NVLink 3.0)
+  bool supported;                     // NVLink is supported on this device
+  bool has_throughput;                // Whether throughput data was available this cycle
+  unsigned long long aggregate_tx;    // Aggregate TX throughput across all links (KiB/s)
+  unsigned long long aggregate_rx;    // Aggregate RX throughput across all links (KiB/s)
+  unsigned long long total_errors;    // Cumulative-since-launch errors across all links
+  unsigned long long total_corrections; // Cumulative-since-launch CRC corrections across all links
+  unsigned long long total_ecc_errors; // Cumulative-since-launch ECC data errors across all links
+};
+
+unsigned nvtop_get_nvlink_info(struct gpu_info *gpu_info, struct nvlink_info *nvlink_info);
+
+// Get display-ready NVLink error/correction/ECC counts from the per-device persistent struct.
+// Returns true if baseline has been established at least once.
+bool nvtop_get_nvlink_error_counts(struct gpu_info *gpu_info,
+                                    unsigned long long *out_errors,
+                                    unsigned long long *out_corrections,
+                                    unsigned long long *out_ecc);
+
+// NVLink probe — call before initialize_curses to set layout mode
+bool nvtop_probe_nvlink_list(struct list_head *devices);
+
+// Reset per-GPU NVLink cache (probed flag, cached linkcount/version, cached info struct).
+// Call when the monitored device set changes so newly-monitored NVLink GPUs get probed fresh.
+void nvtop_reset_nvlink_cache(struct gpu_info *gpu_info);
+
 #endif // EXTRACT_GPUINFO_COMMON_H__
diff --git a/include/nvtop/interface_internal_common.h b/include/nvtop/interface_internal_common.h
index aec93d3..e549a26 100644
--- a/include/nvtop/interface_internal_common.h
+++ b/include/nvtop/interface_internal_common.h
@@ -70,9 +70,11 @@ struct device_window {
   WINDOW *gpu_clock_info;
   WINDOW *mem_clock_info;
   WINDOW *pcie_info;
+  WINDOW *nvlink_info;
   WINDOW *shader_cores;
   WINDOW *l2_cache_size;
   WINDOW *exec_engines;
+  WINDOW *nvlink_errors;
   bool enc_was_visible;
   bool dec_was_visible;
   nvtop_time last_decode_seen;
@@ -154,6 +156,7 @@ enum device_field {
   device_shadercores,
   device_l2features,
   device_execengines,
+  device_nvlink_errors,
   device_field_count,
 };
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b485cb4..8d92f59 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -52,6 +52,8 @@ endif()
 
 if(NVIDIA_SUPPORT)
   target_sources(nvtop PRIVATE extract_gpuinfo_nvidia.c)
+else()
+  target_sources(nvtop PRIVATE nvlink_nvidia_disabled.c)
 endif()
 
 if(ASCEND_SUPPORT)
diff --git a/src/extract_gpuinfo_nvidia.c b/src/extract_gpuinfo_nvidia.c
index 33670a6..e66bd57 100644
--- a/src/extract_gpuinfo_nvidia.c
+++ b/src/extract_gpuinfo_nvidia.c
@@ -21,21 +21,85 @@
 
 #include "nvtop/common.h"
 #include "nvtop/extract_gpuinfo_common.h"
+#include "nvtop/time.h"
 
 #include <dlfcn.h>
 #include <errno.h>
+#include <limits.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
-#define NVML_SUCCESS 0
-#define NVML_ERROR_NOT_SUPPORTED 3
-#define NVML_ERROR_INSUFFICIENT_SIZE 7
-
-typedef struct nvmlDevice *nvmlDevice_t;
-typedef int nvmlReturn_t; // store the enum as int
+// We do NOT include nvml.h — nvtop uses dlsym function pointers for all NVML
+// functions, and including nvml.h would conflict with those declarations.
+// Instead, we manually declare nvmlFieldValue_t and its dependencies here.
+// This satisfies the maintainer's requirement to use the proper struct type
+// instead of raw memcpy offsets, without breaking the dlsym architecture.
+
+// Core NVML types needed throughout the file (from nvml.h — cannot include directly
+// due to dlsym function pointer conflicts with nvtop's architecture).
+
+// NVML return codes (subset — we only use NVML_SUCCESS and NVML_ERROR_NOT_SUPPORTED)
+typedef enum nvmlReturn_enum {
+    NVML_SUCCESS = 0,
+    NVML_ERROR_UNINITIALIZED = 1,
+    NVML_ERROR_INVALID_ARGUMENT = 2,
+    NVML_ERROR_NOT_SUPPORTED = 3,
+    NVML_ERROR_NO_PERMISSION = 4,
+    NVML_ERROR_INSUFFICIENT_SIZE = 7,
+} nvmlReturn_t;
+
+// Opaque device handle (nvml.h defines as struct nvmlDevice_st*)
+typedef struct nvmlDevice_st *nvmlDevice_t;
+
+// nvmlFieldValue_t and its dependencies (manually declared to avoid including nvml.h).
+// These match nvml.h struct/enum definitions from CUDA 12.x.
+typedef enum nvmlValueType_enum {
+    NVML_VALUE_TYPE_DOUBLE = 0,
+    NVML_VALUE_TYPE_UNSIGNED_INT = 1,
+    NVML_VALUE_TYPE_UNSIGNED_LONG = 2,
+    NVML_VALUE_TYPE_UNSIGNED_LONG_LONG = 3,
+    NVML_VALUE_TYPE_SIGNED_LONG_LONG = 4,
+    NVML_VALUE_TYPE_SIGNED_INT = 5,
+    NVML_VALUE_TYPE_UNSIGNED_SHORT = 6,
+    NVML_VALUE_TYPE_COUNT
+} nvmlValueType_t;
+
+typedef union nvmlValue_st {
+    double dVal;
+    int siVal;
+    unsigned int uiVal;
+    unsigned long ulVal;
+    unsigned long long ullVal;
+    signed long long sllVal;
+    unsigned short usVal;
+} nvmlValue_t;
+
+typedef struct nvmlFieldValue_st {
+    unsigned int fieldId;
+    unsigned int scopeId;
+    long long timestamp;
+    long long latencyUsec;
+    nvmlValueType_t valueType;
+    nvmlReturn_t nvmlReturn;
+    nvmlValue_t value;
+} nvmlFieldValue_t;
+
+// NVML field IDs for NVLink throughput and CRC corrections (from nvml.h)
+#ifndef NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_TX
+#define NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_TX 140
+#endif
+#ifndef NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_RX
+#define NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_RX 141
+#endif
+#ifndef NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL
+#define NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL 38
+#endif
+#ifndef NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_TOTAL
+#define NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_TOTAL 160
+#endif
 
 // Init and shutdown
 
@@ -207,6 +271,12 @@ static nvmlReturn_t (*nvmlDeviceGetMPSComputeRunningProcesses[4])(nvmlDevice_t d
 #define NVML_DEVICE_MIG_ENABLE 0x1
 nvmlReturn_t (*nvmlDeviceGetMigMode)(nvmlDevice_t device, unsigned int *currentMode, unsigned int *pendingMode);
 
+// NVLink functions (not present in older NVML versions, gracefully handled)
+static nvmlReturn_t (*nvmlDeviceGetNvLinkState)(nvmlDevice_t device, unsigned int link, unsigned int *isActive);
+static nvmlReturn_t (*nvmlDeviceGetNvLinkVersion)(nvmlDevice_t device, unsigned int link, unsigned int *version);
+static nvmlReturn_t (*nvmlDeviceGetNvLinkErrorCounter)(nvmlDevice_t device, unsigned int counter, unsigned int link, unsigned long long *value);
+static nvmlReturn_t (*nvmlDeviceGetFieldValues)(nvmlDevice_t, unsigned int, nvmlFieldValue_t *);
+
 static void *libnvidia_ml_handle;
 
 static nvmlReturn_t last_nvml_return_status = NVML_SUCCESS;
@@ -276,6 +346,33 @@ struct gpu_info_nvidia {
   nvmlDevice_t gpuhandle;
   bool isInMigMode;
   unsigned long long last_utilization_timestamp;
+
+  // NVLink throughput via NVML API (raw counters, aggregate across all links)
+  unsigned long long nvlink_last_tx;       // Cumulative aggregate TX for delta computation
+  unsigned long long nvlink_last_rx;       // Cumulative aggregate RX for delta computation
+  nvtop_time nvlink_last_poll_time;        // Timestamp for poll throttling
+
+  // NVLink error counter baselines (cumulative since boot, tracked per-device)
+  unsigned long long baseline_errors; // Cumulative errors at last read
+  unsigned long long baseline_corrections; // Cumulative corrections at last read
+  unsigned long long baseline_ecc_errors; // Cumulative ECC data errors at last read
+  bool nvlink_errors_baseline_read; // True after first read establishes baseline
+
+  // Display-ready error/correction/ECC counts (computed in refresh_dynamic_info)
+  unsigned long long display_errors; // Errors since nvtop launch
+  unsigned long long display_corrections; // Corrections since nvtop launch
+  unsigned long long display_ecc_errors; // ECC data errors since nvtop launch
+
+  // Cached NVLink hardware properties (probe once, reuse forever)
+  bool nvlink_probed; // true after first probe, regardless of result
+  unsigned int nvlink_cached_linkcount; // 0 = no NVLink links
+  unsigned int nvlink_cached_version;   // Marketing version, 0 = not yet probed
+
+  // Cached nvlink_info struct: populated during refresh_dynamic_info,
+  // returned by nvtop_get_nvlink_info in the draw path to avoid redundant
+  // NVML calls and CLI forks on every draw cycle.
+  struct nvlink_info cached_nvlink_info;
+  bool cached_nvlink_info_populated;
 };
 
 static LIST_HEAD(allocations);
@@ -288,6 +385,20 @@ static void gpuinfo_nvidia_populate_static_info(struct gpu_info *_gpu_info);
 static void gpuinfo_nvidia_refresh_dynamic_info(struct gpu_info *_gpu_info);
 static void gpuinfo_nvidia_get_running_processes(struct gpu_info *_gpu_info);
 
+// Forward declaration for nvlink_read_errors (defined later, called from refresh_dynamic_info)
+static void nvlink_read_errors(nvmlDevice_t device, unsigned int linkCount, struct gpu_info_nvidia *gpu_info);
+
+// Forward declaration for nvlink_refresh_cached_info (defined later, called from refresh_dynamic_info)
+// Populates gpu_info->cached_nvlink_info with throughput + error data.
+static void nvlink_refresh_cached_info(struct gpu_info_nvidia *gpu_info, unsigned int linkCount);
+
+// Remap raw NVML NVLink protocol version to the marketing version (forward declaration)
+static unsigned int nvlink_marketing_version(unsigned int raw_version);
+
+// Probe NVLink link count and version, caching results in gpu_info_nvidia to avoid
+// repeated NVML API calls on every refresh cycle. Returns cached linkCount (0 if no NVLink).
+unsigned nvlink_probe_and_cache(struct gpu_info_nvidia *gpu_info);
+
 struct gpu_vendor gpu_vendor_nvidia = {
     .init = gpuinfo_nvidia_init,
     .shutdown = gpuinfo_nvidia_shutdown,
@@ -470,6 +581,12 @@ static bool gpuinfo_nvidia_init(void) {
   nvmlDeviceGetProcessUtilization = dlsym(libnvidia_ml_handle, "nvmlDeviceGetProcessUtilization");
   nvmlDeviceGetMigMode = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMigMode");
 
+  // NVLink functions (optional - not available on all drivers/hardware)
+  nvmlDeviceGetNvLinkState = dlsym(libnvidia_ml_handle, "nvmlDeviceGetNvLinkState");
+  nvmlDeviceGetNvLinkVersion = dlsym(libnvidia_ml_handle, "nvmlDeviceGetNvLinkVersion");
+  nvmlDeviceGetNvLinkErrorCounter = dlsym(libnvidia_ml_handle, "nvmlDeviceGetNvLinkErrorCounter");
+  nvmlDeviceGetFieldValues = dlsym(libnvidia_ml_handle, "nvmlDeviceGetFieldValues");
+
   last_nvml_return_status = nvmlInit();
   if (last_nvml_return_status != NVML_SUCCESS) {
     return false;
@@ -749,6 +866,23 @@ static void gpuinfo_nvidia_refresh_dynamic_info(struct gpu_info *_gpu_info) {
       SET_GPUINFO_DYNAMIC(dynamic_info, multi_instance_mode, currentMode == NVML_DEVICE_MIG_ENABLE);
     }
   }
+
+  // NVLink: refresh error counters, throughput, and populate cached nvlink_info
+  // GPUs are non-hot-swappable — all NVLink probing/computation happens here
+  // (refresh path), and nvtop_get_nvlink_info() just returns the cached copy
+  // in the draw path.
+  // "supported but no bridge" case: version is probed before link state, so
+  // cached_version > 0 means NVLink hardware exists even with linkCount == 0.
+  if (nvmlDeviceGetNvLinkState) {
+    unsigned int linkCount = nvlink_probe_and_cache(gpu_info);
+    if (linkCount > 0 || gpu_info->nvlink_cached_version > 0) {
+      // Error counters only make sense when links are active.
+      if (linkCount > 0 && (nvmlDeviceGetNvLinkErrorCounter || nvmlDeviceGetFieldValues))
+        nvlink_read_errors(device, linkCount, gpu_info);
+      // Throughput + cached info struct (handles 0-link case for display)
+      nvlink_refresh_cached_info(gpu_info, linkCount);
+    }
+  }
 }
 
 static void gpuinfo_nvidia_get_process_utilization(struct gpu_info_nvidia *gpu_info, unsigned num_processes_recovered,
@@ -936,3 +1070,355 @@ static void gpuinfo_nvidia_get_running_processes(struct gpu_info *_gpu_info) {
         gpu_info->base.dynamic_info.multi_instance_mode))
     gpuinfo_nvidia_get_process_utilization(gpu_info, _gpu_info->processes_count, _gpu_info->processes);
 }
+
+// NVML NVLink enums (guarded — nvml.h defines these; local fallback for older drivers)
+#ifndef NVML_NVLINK_MAX_LINKS_INTERNAL
+#define NVML_NVLINK_MAX_LINKS_INTERNAL 36
+#endif
+
+#ifndef NVML_NVLINK_ERROR_DL_REPLAY
+// NVML error counter types
+#define NVML_NVLINK_ERROR_DL_REPLAY   0
+#define NVML_NVLINK_ERROR_DL_RECOVERY 1
+#define NVML_NVLINK_ERROR_DL_CRC_FLIT 2
+#define NVML_NVLINK_ERROR_DL_CRC_DATA 3
+#define NVML_NVLINK_ERROR_DL_ECC_DATA 4
+#endif
+
+// Helper: Query a single NVML field value via nvmlDeviceGetFieldValues.
+// Returns true if the field was successfully read into *out_val.
+static bool nvlink_query_field(nvmlDevice_t device, unsigned int field_id,
+                               unsigned int scope_id, unsigned long long *out_val) {
+    if (!nvmlDeviceGetFieldValues)
+        return false;
+    nvmlFieldValue_t fv = {0};
+    fv.fieldId = field_id;
+    fv.scopeId = scope_id;
+    nvmlReturn_t ret = nvmlDeviceGetFieldValues(device, 1, &fv);
+    if (ret != NVML_SUCCESS || fv.nvmlReturn != NVML_SUCCESS)
+        return false;
+    *out_val = fv.value.ullVal;
+    return true;
+}
+
+// Probe NVLink link count and version, caching results in gpu_info_nvidia to avoid
+// repeated NVML API calls on every refresh cycle. linkCount and version are static
+// hardware properties — once discovered, they never change during the process lifetime.
+// Returns the cached linkCount (0 if no NVLink).
+unsigned nvlink_probe_and_cache(struct gpu_info_nvidia *gpu_info) {
+  // Already probed — return cached result (even if linkcount is 0)
+  if (gpu_info->nvlink_probed)
+    return gpu_info->nvlink_cached_linkcount;
+
+  if (!nvmlDeviceGetNvLinkState) {
+    gpu_info->nvlink_probed = true;
+    return 0;
+  }
+
+  nvmlDevice_t device = gpu_info->gpuhandle;
+  unsigned int linkCount = 0;
+  unsigned int version = 0;
+
+  // Probe NVLink version BEFORE the link state loop. This succeeds on any GPU
+  // with NVLink hardware, even when no bridge is connected (all links return
+  // NVML_ERROR_NOT_SUPPORTED from GetNvLinkState). This lets us detect
+  // "NVLink supported but no active links" vs "no NVLink hardware at all."
+  if (nvmlDeviceGetNvLinkVersion) {
+    nvmlReturn_t vret = nvmlDeviceGetNvLinkVersion(device, 0, &version);
+    if (vret == NVML_SUCCESS)
+      version = nvlink_marketing_version(version);
+  }
+
+  // Probe links. A link is counted only if nvmlDeviceGetNvLinkState succeeds
+  // AND isActive == 1. Without a bridge, the API returns SUCCESS with isActive=0
+  // for all physical link slots — those must NOT be counted.
+  // Consume links must be contiguous from 0: we stop at the first inactive link
+  // (either isActive=0 or API error) to avoid reporting phantom counts.
+  for (unsigned int link = 0; link < NVML_NVLINK_MAX_LINKS_INTERNAL; link++) {
+    unsigned int isActive = 0;
+    nvmlReturn_t ret = nvmlDeviceGetNvLinkState(device, link, &isActive);
+    if (ret == NVML_SUCCESS && isActive) {
+      linkCount = link + 1;
+    } else if (ret == NVML_ERROR_NOT_SUPPORTED) {
+      // This link slot does not exist on this hardware — stop probing.
+      break;
+    } else {
+      // ret != SUCCESS, or isActive == 0: no more active links.
+      break;
+    }
+  }
+  // Cache results
+  gpu_info->nvlink_probed = true;
+  gpu_info->nvlink_cached_linkcount = linkCount;
+  gpu_info->nvlink_cached_version = version;
+  return linkCount;
+}
+
+// Read NVLink error counters (replay, recovery, CRC), storing results in the persistent gpu_info struct.
+// Uses baseline subtraction to show only errors since nvtop launch (Option B).
+// Called from refresh_dynamic_info so it does NOT run during the startup probe in nvtop_probe_nvlink_list.
+// Corrections and ECC data errors are read separately in nvlink_refresh_cached_info() via NVML batched field query.
+static void nvlink_read_errors(nvmlDevice_t device, unsigned int linkCount, struct gpu_info_nvidia *gpu_info) {
+  // Error counters via nvmlDeviceGetNvLinkErrorCounter
+  unsigned long long cumulative_errors = 0;
+  if (nvmlDeviceGetNvLinkErrorCounter) {
+    for (unsigned int link = 0; link < linkCount; link++) {
+      unsigned long long val = 0;
+      nvmlReturn_t ret;
+      // Replay errors
+      ret = nvmlDeviceGetNvLinkErrorCounter(device, NVML_NVLINK_ERROR_DL_REPLAY, link, &val);
+      if (ret == NVML_SUCCESS) cumulative_errors += val;
+      // Recovery errors
+      ret = nvmlDeviceGetNvLinkErrorCounter(device, NVML_NVLINK_ERROR_DL_RECOVERY, link, &val);
+      if (ret == NVML_SUCCESS) cumulative_errors += val;
+      // CRC FLIT errors
+      ret = nvmlDeviceGetNvLinkErrorCounter(device, NVML_NVLINK_ERROR_DL_CRC_FLIT, link, &val);
+      if (ret == NVML_SUCCESS) cumulative_errors += val;
+      // CRC DATA errors
+      ret = nvmlDeviceGetNvLinkErrorCounter(device, NVML_NVLINK_ERROR_DL_CRC_DATA, link, &val);
+      if (ret == NVML_SUCCESS) cumulative_errors += val;
+    }
+  }
+
+  // Baseline subtraction: show only errors since nvtop launch
+  if (!gpu_info->nvlink_errors_baseline_read) {
+    // First read — establish baseline, display zeros
+    gpu_info->baseline_errors = cumulative_errors;
+    gpu_info->nvlink_errors_baseline_read = true;
+    gpu_info->display_errors = 0;
+  } else {
+    // Subsequent reads — show delta from baseline
+    gpu_info->display_errors = cumulative_errors > gpu_info->baseline_errors
+                                  ? cumulative_errors - gpu_info->baseline_errors
+                                  : 0;
+  }
+}
+
+// Public getter for display-ready error/correction/ECC counts from a struct gpu_info.
+// Returns true if baseline has been established at least once.
+bool nvtop_get_nvlink_error_counts(struct gpu_info *_gpu_info,
+                                    unsigned long long *out_errors,
+                                    unsigned long long *out_corrections,
+                                    unsigned long long *out_ecc) {
+  // NVLink is an NVIDIA-only technology — skip non-NVIDIA GPUs immediately
+  if (strcmp(_gpu_info->vendor->name, "NVIDIA"))
+    return false;
+
+  struct gpu_info_nvidia *gpu_info = container_of(_gpu_info, struct gpu_info_nvidia, base);
+  if (!gpu_info->nvlink_errors_baseline_read) {
+    return false;
+  }
+  *out_errors = gpu_info->display_errors;
+  *out_corrections = gpu_info->display_corrections;
+  *out_ecc = gpu_info->display_ecc_errors;
+  return true;
+}
+
+// Remap raw NVML NVLink protocol version to the marketing version.
+// NVML raw values do NOT equal marketing versions (raw 5 = 3.1 -> rounds to 3).
+static unsigned int nvlink_marketing_version(unsigned int raw_version) {
+  // Raw NVML value to rounded marketing major version.
+  switch (raw_version) {
+    case 1: return 1;
+    case 2: return 2;
+    case 3: return 2;  // NVLink 2.2 -> 2
+    case 4: return 3;  // NVLink 3.0 -> 3
+    case 5: return 3;  // NVLink 3.1 -> 3
+    case 6: return 4;  // NVLink 4.0
+    case 7: return 5;  // NVLink 5.0
+    case 8: return 6;  // NVLink 6.0 (Rubin)
+    default: return raw_version;
+  }
+}
+
+// Get NVLink info (version, link count, aggregate throughput via NVML API).
+// Populate cached_nvlink_info with link count, version, throughput, and error/correction counts.
+// Called from refresh_dynamic_info on every refresh cycle (refresh path).
+// GPUs are non-hot-swappable, so all NVLink data is computed here and cached —
+// nvtop_get_nvlink_info() in the draw path just returns the cached copy.
+static void nvlink_refresh_cached_info(struct gpu_info_nvidia *gpu_info, unsigned int linkCount) {
+  struct nvlink_info *cache = &gpu_info->cached_nvlink_info;
+
+  cache->supported = true;
+  cache->num_links = linkCount;
+  cache->version = gpu_info->nvlink_cached_version;
+
+  // Throughput: skip entirely when there are 0 links (nothing to measure).
+  if (linkCount == 0) {
+    cache->has_throughput = false;
+    cache->aggregate_tx = 0;
+    cache->aggregate_rx = 0;
+    cache->total_errors = 0;
+    cache->total_corrections = 0;
+    cache->total_ecc_errors = 0;
+    gpu_info->cached_nvlink_info_populated = true;
+    return;
+  }
+
+  // Throughput and corrections via NVML API in a single batched call.
+  // RAW fields (140/141) include protocol overhead; DATA fields (138/139) return
+  // identical TX/RX on consumer GPUs with aggregate scopeId, yielding zero throughput.
+  // Field 38 (CRC corrections) is already per-device aggregate -- scopeId=0.
+  // Poll every 2 seconds to keep API call frequency reasonable.
+  nvtop_time current_time;
+  nvtop_get_current_time(&current_time);
+  double delta_s = (gpu_info->nvlink_last_poll_time.tv_sec > 0)
+                     ? nvtop_difftime(gpu_info->nvlink_last_poll_time, current_time)
+                     : 0;
+
+  // Single batched nvmlDeviceGetFieldValues call for TX, RX, corrections, and ECC errors.
+  // Each entry's nvmlReturn field is checked individually for validity.
+  nvmlFieldValue_t batch[4] = {0};
+  batch[0].fieldId = NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_TX;
+  batch[0].scopeId = UINT_MAX;
+  batch[1].fieldId = NVML_FI_DEV_NVLINK_THROUGHPUT_RAW_RX;
+  batch[1].scopeId = UINT_MAX;
+  batch[2].fieldId = NVML_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL;
+  batch[2].scopeId = 0;
+  batch[3].fieldId = NVML_FI_DEV_NVLINK_ECC_DATA_ERROR_COUNT_TOTAL;
+  batch[3].scopeId = 0;
+
+  unsigned long long new_tx = 0, new_rx = 0, new_corrections = 0, new_ecc_errors = 0;
+  bool got_tx = false, got_rx = false, got_corrections = false, got_ecc_errors = false;
+
+  if (nvmlDeviceGetFieldValues) {
+    nvmlReturn_t ret = nvmlDeviceGetFieldValues(gpu_info->gpuhandle, 4, batch);
+    if (ret == NVML_SUCCESS) {
+      if (batch[0].nvmlReturn == NVML_SUCCESS) {
+        new_tx = batch[0].value.ullVal;
+        got_tx = true;
+      }
+      if (batch[1].nvmlReturn == NVML_SUCCESS) {
+        new_rx = batch[1].value.ullVal;
+        got_rx = true;
+      }
+      if (batch[2].nvmlReturn == NVML_SUCCESS) {
+        new_corrections = batch[2].value.ullVal;
+        got_corrections = true;
+      }
+      if (batch[3].nvmlReturn == NVML_SUCCESS) {
+        new_ecc_errors = batch[3].value.ullVal;
+        got_ecc_errors = true;
+      }
+    }
+  }
+
+  // Throughput delta computation (TX + RX)
+  if (got_tx || got_rx) {
+    if (gpu_info->nvlink_last_poll_time.tv_sec > 0 && delta_s > 0) {
+      unsigned long long delta_tx = (new_tx >= gpu_info->nvlink_last_tx)
+                                       ? new_tx - gpu_info->nvlink_last_tx : 0;
+      unsigned long long delta_rx = (new_rx >= gpu_info->nvlink_last_rx)
+                                       ? new_rx - gpu_info->nvlink_last_rx : 0;
+      cache->aggregate_tx = (unsigned long long)((double)delta_tx / delta_s);
+      cache->aggregate_rx = (unsigned long long)((double)delta_rx / delta_s);
+      cache->has_throughput = true;
+    } else {
+      cache->has_throughput = false;
+    }
+    gpu_info->nvlink_last_tx = new_tx;
+    gpu_info->nvlink_last_rx = new_rx;
+  } else {
+    cache->has_throughput = false;
+    cache->aggregate_tx = 0;
+    cache->aggregate_rx = 0;
+  }
+  gpu_info->nvlink_last_poll_time = current_time;
+
+  // Corrections -- use same baseline subtraction pattern as errors
+  if (got_corrections) {
+    if (!gpu_info->nvlink_errors_baseline_read) {
+      gpu_info->baseline_corrections = new_corrections;
+      gpu_info->display_corrections = 0;
+      gpu_info->nvlink_errors_baseline_read = true;
+    } else {
+      gpu_info->display_corrections = new_corrections > gpu_info->baseline_corrections
+                                        ? new_corrections - gpu_info->baseline_corrections : 0;
+    }
+  }
+
+  // ECC data errors -- use same baseline subtraction pattern as errors/corrections
+  if (got_ecc_errors) {
+    if (!gpu_info->nvlink_errors_baseline_read) {
+      gpu_info->baseline_ecc_errors = new_ecc_errors;
+      gpu_info->display_ecc_errors = 0;
+      gpu_info->nvlink_errors_baseline_read = true;
+    } else {
+      gpu_info->display_ecc_errors = new_ecc_errors > gpu_info->baseline_ecc_errors
+                                      ? new_ecc_errors - gpu_info->baseline_ecc_errors : 0;
+    }
+  }
+
+  // Error/correction/ECC counts from display-ready fields
+  cache->total_errors = gpu_info->display_errors;
+  cache->total_corrections = gpu_info->display_corrections;
+  cache->total_ecc_errors = gpu_info->display_ecc_errors;
+
+  gpu_info->cached_nvlink_info_populated = true;
+}
+
+// Return cached nvlink_info struct. Called from the draw path (draw_gpu_info_ncurses)
+// to avoid redundant NVML calls and CLI forks on every draw cycle.
+// GPUs are non-hot-swappable, so the cached struct is authoritative.
+// For the startup probe (nvtop_probe_nvlink_list) before refresh_dynamic_info has run,
+// falls back to computing on-demand.
+unsigned nvtop_get_nvlink_info(struct gpu_info *_gpu_info, struct nvlink_info *nvlink_info) {
+  if (!_gpu_info || !nvlink_info)
+    return 0;
+
+  // NVLink is an NVIDIA-only technology — skip non-NVIDIA GPUs immediately
+  if (strcmp(_gpu_info->vendor->name, "NVIDIA")) {
+    memset(nvlink_info, 0, sizeof(*nvlink_info));
+    return 0;
+  }
+
+  struct gpu_info_nvidia *gpu_info = container_of(_gpu_info, struct gpu_info_nvidia, base);
+
+  // If cached info is available (after first refresh), just return it.
+  // This is the fast path — eliminates all NVML calls and CLI forks in the draw path.
+  if (gpu_info->cached_nvlink_info_populated) {
+    memcpy(nvlink_info, &gpu_info->cached_nvlink_info, sizeof(*nvlink_info));
+    return nvlink_info->num_links;
+  }
+
+  // Fallback for startup probe (nvtop_probe_nvlink_list) before refresh_dynamic_info ran:
+  // Populate minimal info (link count + version, no throughput) to determine if NVLink exists.
+  // "supported but no bridge" case: version probed before link state, so set supported=true
+  // even when linkCount == 0 if we got a version reading.
+  if (!nvmlDeviceGetNvLinkState)
+    return 0;
+
+  memset(nvlink_info, 0, sizeof(*nvlink_info));
+
+  unsigned int linkCount = nvlink_probe_and_cache(gpu_info);
+
+  if (gpu_info->nvlink_cached_version > 0) {
+    // NVLink hardware detected (version read succeeded), even if no links active.
+    nvlink_info->supported = true;
+    nvlink_info->num_links = linkCount;
+    nvlink_info->version = gpu_info->nvlink_cached_version;
+  }
+
+  return nvlink_info->num_links;
+}
+
+// Reset all NVLink caches for a single GPU. Called when monitored device set changes.
+void nvtop_reset_nvlink_cache(struct gpu_info *_gpu_info) {
+  // NVLink is an NVIDIA-only technology — skip non-NVIDIA GPUs immediately
+  if (strcmp(_gpu_info->vendor->name, "NVIDIA"))
+    return;
+
+  struct gpu_info_nvidia *gpu_info = container_of(_gpu_info, struct gpu_info_nvidia, base);
+  gpu_info->nvlink_probed = false;
+  gpu_info->nvlink_cached_linkcount = 0;
+  gpu_info->nvlink_cached_version = 0;
+  gpu_info->cached_nvlink_info_populated = false;
+  memset(&gpu_info->cached_nvlink_info, 0, sizeof(gpu_info->cached_nvlink_info));
+  gpu_info->baseline_errors = 0;
+  gpu_info->baseline_corrections = 0;
+  gpu_info->baseline_ecc_errors = 0;
+  gpu_info->nvlink_errors_baseline_read = false;
+  gpu_info->nvlink_last_tx = 0;
+  gpu_info->nvlink_last_rx = 0;
+  gpu_info->nvlink_last_poll_time = (struct timespec){0};
+}
diff --git a/src/interface.c b/src/interface.c
index ae23199..dd117d8 100644
--- a/src/interface.c
+++ b/src/interface.c
@@ -46,8 +46,56 @@ static unsigned int sizeof_device_field[device_field_count] = {
     [device_name] = 11,       [device_fan_speed] = 11,   [device_temperature] = 10, [device_power] = 15,
     [device_clock] = 11,      [device_mem_clock] = 12,   [device_pcie] = 46,        [device_shadercores] = 7,
     [device_l2features] = 11, [device_execengines] = 11,
+    [device_nvlink_errors] = 33,
 };
 
+// True if any monitored device has NVLink hardware support (even if 0 links active).
+// Controls whether to allocate the nvlink_info window for displaying "NVL3 0x" etc.
+static bool any_device_has_nvlink = false;
+// True if any monitored device has NVLink with active links (linkCount > 0).
+// Controls layout adjustments (shrinking fan field) and the nvlink_errors
+// window allocation.
+static bool any_device_has_nvlink_active = false;
+
+// When NVLink has ACTIVE links, shrink fan field from 11 to 8 to make room on line 2.
+// Only done when there are actual links to show throughput for — 0-link "NVL3 0x"
+// display does not require any padding reduction.
+static void nvtop_adjust_field_sizes_for_nvlink(void) {
+  if (any_device_has_nvlink_active) {
+    sizeof_device_field[device_fan_speed] = 8;  // "FAN %3u%%" (was 11 with padding)
+  } else {
+    sizeof_device_field[device_fan_speed] = 11; // Restore default padding
+  }
+}
+
+bool nvtop_probe_nvlink_list(struct list_head *devices) {
+  // Skip re-probing if we already know at least one device has NVLink.
+  // NVLink support is a static hardware property that does not change at runtime.
+  if (any_device_has_nvlink)
+    return true;
+
+  bool has_nvlink = false;
+  bool has_nvlink_active = false;
+
+  struct gpu_info *gpu;
+  list_for_each_entry(gpu, devices, list) {
+    struct nvlink_info nvl = {0};
+    // nvtop_get_nvlink_info returns num_links (could be 0 for "supported but no bridge").
+    // Check nvl.supported separately to catch the 0-link case.
+    nvtop_get_nvlink_info(gpu, &nvl);
+    if (nvl.supported) {
+      has_nvlink = true;
+      if (nvl.num_links > 0)
+        has_nvlink_active = true;
+    }
+  }
+
+  any_device_has_nvlink = has_nvlink;
+  any_device_has_nvlink_active = has_nvlink_active;
+
+  return has_nvlink;
+}
+
 static unsigned int sizeof_process_field[process_field_count] = {
     [process_pid] = 7,       [process_user] = 4,          [process_gpu_id] = 3,   [process_type] = 8,
     [process_gpu_rate] = 4,  [process_enc_rate] = 4,      [process_dec_rate] = 4,
@@ -70,7 +118,7 @@ static void alloc_device_window(unsigned int start_row, unsigned int start_col,
   if (dwin->pcie_info == NULL)
     goto alloc_error;
 
-  // Line 2 = GPU clk | MEM clk | Temp | Fan | Power
+  // Line 2 = GPU clk | MEM clk | Temp | Fan | Power | NVLink
   dwin->gpu_clock_info = newwin(1, sizeof_device_field[device_clock], start_row + 1, start_col);
   if (dwin->gpu_clock_info == NULL)
     goto alloc_error;
@@ -94,6 +142,18 @@ static void alloc_device_window(unsigned int start_row, unsigned int start_col,
                  sizeof_device_field[device_temperature] + sizeof_device_field[device_fan_speed]);
   if (dwin->power_info == NULL)
     goto alloc_error;
+  // NVLink appended to power_info on the same row (start_row + 1), using remaining width
+  if (any_device_has_nvlink) {
+    dwin->nvlink_info =
+        newwin(1, sizeof_device_field[device_pcie] - sizeof_device_field[device_power] - spacer * 3, start_row + 1,
+               start_col + spacer * 4 + sizeof_device_field[device_clock] + sizeof_device_field[device_mem_clock] +
+                   sizeof_device_field[device_temperature] + sizeof_device_field[device_fan_speed] +
+                   spacer * 2 + sizeof_device_field[device_power]);
+    if (dwin->nvlink_info == NULL)
+      goto alloc_error;
+  } else {
+    dwin->nvlink_info = NULL;
+  }
 
   // Line 3 = GPU used | MEM used | Encoder | Decoder
 
@@ -177,6 +237,18 @@ static void alloc_device_window(unsigned int start_row, unsigned int start_col,
              start_col + spacer * 2 + sizeof_device_field[device_shadercores] + sizeof_device_field[device_l2features]);
   if (dwin->exec_engines == NULL)
     goto alloc_error;
+  // NVLink errors appended to exec_engines on the same row (start_row + 3), conditional on NVLink
+  // Only allocate for devices with active links — 0-link devices have no error counters to show.
+  if (any_device_has_nvlink_active) {
+    dwin->nvlink_errors =
+        newwin(1, sizeof_device_field[device_nvlink_errors], start_row + 3,
+               start_col + spacer * 3 + sizeof_device_field[device_shadercores] +
+                   sizeof_device_field[device_l2features] + sizeof_device_field[device_execengines]);
+    if (dwin->nvlink_errors == NULL)
+      goto alloc_error;
+  } else {
+    dwin->nvlink_errors = NULL;
+  }
 
   return;
 alloc_error:
@@ -205,6 +277,10 @@ static void free_device_windows(struct device_window *dwin) {
   delwin(dwin->shader_cores);
   delwin(dwin->l2_cache_size);
   delwin(dwin->exec_engines);
+  if (dwin->nvlink_info != NULL)
+    delwin(dwin->nvlink_info);
+  if (dwin->nvlink_errors != NULL)
+    delwin(dwin->nvlink_errors);
 }
 
 static void alloc_process_with_option(struct nvtop_interface *interface, unsigned posX, unsigned posY, unsigned sizeX,
@@ -347,10 +423,18 @@ static void alloc_plot_window(unsigned devices_count, struct window_position *pl
 }
 
 static unsigned device_length(void) {
-  return max(sizeof_device_field[device_name] + sizeof_device_field[device_pcie] + 1,
-             sizeof_device_field[device_clock] + sizeof_device_field[device_mem_clock] +
-                 sizeof_device_field[device_temperature] + sizeof_device_field[device_fan_speed] +
-                 sizeof_device_field[device_power] + 5);
+  unsigned line1 = sizeof_device_field[device_name] + sizeof_device_field[device_pcie] + 1;
+
+  // Line 2 base: clock, mem_clock, temp, fan, power + spacers (4 spacers + 1 = 5)
+  // Do NOT expand for NVLink — the NVLink window on line 2 extends past the
+  // nominal panel edge and ncurses renders it fine. Expanding it would make
+  // line 3 bar charts (GPU/MEM/Enc/Dec) too wide. This applies to both the
+  // 0-link case ("NVL3 0x") and the active-links case (with throughput).
+  unsigned line2 = sizeof_device_field[device_clock] + sizeof_device_field[device_mem_clock] +
+                   sizeof_device_field[device_temperature] + sizeof_device_field[device_fan_speed] +
+                   sizeof_device_field[device_power] + 5;
+
+  return max(line1, line2);
 }
 
 static pid_t nvtop_pid;
@@ -367,6 +451,10 @@ static void initialize_all_windows(struct nvtop_interface *dwin) {
   struct window_position plot_positions[MAX_CHARTS];
   struct window_position setup_position;
 
+  // NVLink layout adjustments must happen before panel dimensions are computed.
+  // any_device_has_nvlink_active is set by the probe that runs before this function.
+  nvtop_adjust_field_sizes_for_nvlink();
+
   compute_sizes_from_layout(devices_count, dwin->options.has_gpu_info_bar ? 4 : 3, device_length(), rows - 1, cols,
                             dwin->options.gpu_specific_opts, dwin->options.process_fields_displayed, device_positions,
                             &dwin->num_plots, plot_positions, map_device_to_plot, &process_position, &setup_position,
@@ -557,10 +645,10 @@ static void draw_temp_color(WINDOW *win, unsigned int temp, unsigned int temp_sl
   wnoutrefresh(win);
 }
 
-static void print_pcie_at_scale(WINDOW *win, unsigned int value) {
+static void print_data_at_scale(WINDOW *win, unsigned long long value) {
   int prefix_off;
   double val_d = value;
-  for (prefix_off = 1; prefix_off < 5 && val_d >= 1000.; ++prefix_off) {
+  for (prefix_off = 1; prefix_off < 6 && val_d >= 1000.; ++prefix_off) {
     val_d = val_d / 1024.;
   }
   if (val_d >= 100.) {
@@ -575,6 +663,10 @@ static void print_pcie_at_scale(WINDOW *win, unsigned int value) {
   wprintw(win, " %sB/s", memory_prefix[prefix_off]);
 }
 
+// print_data_at_scale (renamed from print_pcie_at_scale): reused for NVLink throughput
+// (identical scale logic, bounds check extended to prefix_off < 6 for TiB/s).
+// Takes unsigned long long to avoid 32-bit truncation on high-throughput hardware.
+
 static inline void werase_and_wnoutrefresh(WINDOW *w) {
   werase(w);
   wnoutrefresh(w);
@@ -778,19 +870,41 @@ static void draw_devices(struct list_head *devices, struct nvtop_interface *inte
 
     // FAN
     if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, fan_speed)) {
-      mvwprintw(dev->fan_speed, 0, 0, " FAN %3u%%  ",
-                device->dynamic_info.fan_speed > 100 ? 100 : device->dynamic_info.fan_speed);
-      mvwchgat(dev->fan_speed, 0, 1, 3, 0, cyan_color, NULL);
+      if (any_device_has_nvlink_active) {
+        mvwprintw(dev->fan_speed, 0, 0, "FAN %3u%%",
+                  device->dynamic_info.fan_speed > 100 ? 100 : device->dynamic_info.fan_speed);
+        mvwchgat(dev->fan_speed, 0, 0, 3, 0, cyan_color, NULL);
+      } else {
+        mvwprintw(dev->fan_speed, 0, 0, " FAN %3u%%  ",
+                  device->dynamic_info.fan_speed > 100 ? 100 : device->dynamic_info.fan_speed);
+        mvwchgat(dev->fan_speed, 0, 1, 3, 0, cyan_color, NULL);
+      }
     } else if (device->static_info.integrated_graphics) {
-      mvwprintw(dev->fan_speed, 0, 0, "  CPU-FAN  ");
-      mvwchgat(dev->fan_speed, 0, 2, 7, 0, cyan_color, NULL);
+      if (any_device_has_nvlink_active) {
+        mvwprintw(dev->fan_speed, 0, 0, "CPU-FAN");
+        mvwchgat(dev->fan_speed, 0, 0, 7, 0, cyan_color, NULL);
+      } else {
+        mvwprintw(dev->fan_speed, 0, 0, "  CPU-FAN  ");
+        mvwchgat(dev->fan_speed, 0, 2, 7, 0, cyan_color, NULL);
+      }
     } else if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, fan_rpm)) {
-      mvwprintw(dev->fan_speed, 0, 0, "FAN %4uRPM",
-                device->dynamic_info.fan_rpm > 9999 ? 9999 : device->dynamic_info.fan_rpm);
-      mvwchgat(dev->fan_speed, 0, 0, 3, 0, cyan_color, NULL);
+      if (any_device_has_nvlink_active) {
+        mvwprintw(dev->fan_speed, 0, 0, "FAN%3uR",
+                  device->dynamic_info.fan_rpm > 999 ? 999 : device->dynamic_info.fan_rpm);
+        mvwchgat(dev->fan_speed, 0, 0, 3, 0, cyan_color, NULL);
+      } else {
+        mvwprintw(dev->fan_speed, 0, 0, "FAN %4uRPM",
+                  device->dynamic_info.fan_rpm > 9999 ? 9999 : device->dynamic_info.fan_rpm);
+        mvwchgat(dev->fan_speed, 0, 0, 3, 0, cyan_color, NULL);
+      }
     } else {
-      mvwprintw(dev->fan_speed, 0, 0, "  FAN N/A  ");
-      mvwchgat(dev->fan_speed, 0, 2, 3, 0, cyan_color, NULL);
+      if (any_device_has_nvlink_active) {
+        mvwprintw(dev->fan_speed, 0, 0, "FAN N/A");
+        mvwchgat(dev->fan_speed, 0, 0, 3, 0, cyan_color, NULL);
+      } else {
+        mvwprintw(dev->fan_speed, 0, 0, "  FAN N/A  ");
+        mvwchgat(dev->fan_speed, 0, 2, 3, 0, cyan_color, NULL);
+      }
     }
     wnoutrefresh(dev->fan_speed);
 
@@ -830,6 +944,40 @@ static void draw_devices(struct list_head *devices, struct nvtop_interface *inte
     mvwchgat(dev->power_info, 0, 0, 3, 0, cyan_color, NULL);
     wnoutrefresh(dev->power_info);
 
+    // NVLink info (on same row as power_info)
+    if (dev->nvlink_info != NULL) {
+      werase(dev->nvlink_info);
+      struct nvlink_info nvl_info = {0};
+      nvtop_get_nvlink_info(device, &nvl_info);
+      if (nvl_info.supported) {
+        wcolor_set(dev->nvlink_info, cyan_color, NULL);
+        wprintw(dev->nvlink_info, "NVL");
+        wcolor_set(dev->nvlink_info, magenta_color, NULL);
+        if (nvl_info.version > 0)
+          wprintw(dev->nvlink_info, "%u", nvl_info.version);
+        else
+          wprintw(dev->nvlink_info, "?");
+        wstandend(dev->nvlink_info);
+
+        if (nvl_info.num_links > 0) {
+          // Active links: show link count and throughput
+          if (nvl_info.num_links < 10)
+            wprintw(dev->nvlink_info, " %ux ", nvl_info.num_links);
+          else
+            wprintw(dev->nvlink_info, "%ux ", nvl_info.num_links);
+
+          if (nvl_info.has_throughput) {
+            unsigned long long total_kib = nvl_info.aggregate_tx + nvl_info.aggregate_rx;
+            print_data_at_scale(dev->nvlink_info, total_kib);
+          }
+        } else {
+          // No active links (no bridge connected) — show "0x"
+          wprintw(dev->nvlink_info, " 0x");
+        }
+      }
+      wnoutrefresh(dev->nvlink_info);
+    }
+
     // PICe throughput
     werase(dev->pcie_info);
     if (device->static_info.integrated_graphics) {
@@ -852,14 +1000,14 @@ static void draw_devices(struct list_head *devices, struct nvtop_interface *inte
     wprintw(dev->pcie_info, " RX: ");
     wstandend(dev->pcie_info);
     if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, pcie_rx))
-      print_pcie_at_scale(dev->pcie_info, device->dynamic_info.pcie_rx);
+      print_data_at_scale(dev->pcie_info, device->dynamic_info.pcie_rx);
     else
       wprintw(dev->pcie_info, "N/A");
     wcolor_set(dev->pcie_info, magenta_color, NULL);
     wprintw(dev->pcie_info, " TX: ");
     wstandend(dev->pcie_info);
     if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, pcie_tx))
-      print_pcie_at_scale(dev->pcie_info, device->dynamic_info.pcie_tx);
+      print_data_at_scale(dev->pcie_info, device->dynamic_info.pcie_tx);
     else
       wprintw(dev->pcie_info, "N/A");
 
@@ -901,6 +1049,35 @@ static void draw_devices(struct list_head *devices, struct nvtop_interface *inte
         wprintw(dev->exec_engines, "N/A");
 
       wnoutrefresh(dev->exec_engines);
+
+      // NVLink errors/corrections/ECC (conditional on NVLink)
+      if (dev->nvlink_errors != NULL) {
+        werase(dev->nvlink_errors);
+        unsigned long long err_cnt = 0, cor_cnt = 0, ecc_cnt = 0;
+        if (nvtop_get_nvlink_error_counts(device, &err_cnt, &cor_cnt, &ecc_cnt)) {
+          wcolor_set(dev->nvlink_errors, cyan_color, NULL);
+          wprintw(dev->nvlink_errors, "NVL");
+          wstandend(dev->nvlink_errors);
+          // FLIT errors (field 38)
+          wprintw(dev->nvlink_errors, " FL:");
+          if (err_cnt > 0)
+            wcolor_set(dev->nvlink_errors, red_color, NULL);
+          wprintw(dev->nvlink_errors, "%05u", (unsigned)(err_cnt % 100000));
+          wstandend(dev->nvlink_errors);
+          // ECC data errors (field 160)
+          wprintw(dev->nvlink_errors, " EE:");
+          if (ecc_cnt > 0)
+            wcolor_set(dev->nvlink_errors, red_color, NULL);
+          wprintw(dev->nvlink_errors, "%05u", (unsigned)(ecc_cnt % 100000));
+          wstandend(dev->nvlink_errors);
+          // CRC corrections (field 38)
+          wprintw(dev->nvlink_errors, " CR:");
+          if (cor_cnt > 0)
+            wcolor_set(dev->nvlink_errors, yellow_color, NULL);
+          wprintw(dev->nvlink_errors, "%05u", (unsigned)(cor_cnt % 100000));
+        }
+        wnoutrefresh(dev->nvlink_errors);
+      }
     }
 
     dev_id++;
@@ -2059,6 +2236,24 @@ void interface_check_monitored_gpu_change(struct nvtop_interface **interface, un
     nvtop_interface_option options_copy = (*interface)->options;
     options_copy.has_monitored_set_changed = false;
     memset(&(*interface)->options, 0, sizeof(options_copy));
+    // Reset NVLink probe cache when monitored device set changes — the user
+    // may have switched from an NVLink GPU to a non-NVLink one (or vice versa).
+    // The cache will be repopulated on the next refresh cycle.
+    any_device_has_nvlink = false;
+    any_device_has_nvlink_active = false;
+    // Reset fan field to default width — it may have been compacted to 8 for
+    // NVLink-active layout. Without this, initialize_curses() below would
+    // allocate fan_speed windows at stale width 8.
+    sizeof_device_field[device_fan_speed] = 11;
+    // Reset NVLink probes on all monitored GPUs so they get probed fresh.
+    { struct gpu_info *g;
+      list_for_each_entry(g, monitoredGpus, list)
+        nvtop_reset_nvlink_cache(g);
+    }
+    // Re-probe NVLink now that caches are cleared, so that
+    // any_device_has_nvlink_active is correct when initialize_curses()
+    // calls initialize_all_windows() for layout decisions.
+    nvtop_probe_nvlink_list(monitoredGpus);
     *num_monitored_gpus =
         interface_check_and_fix_monitored_gpus(allDevCount, monitoredGpus, nonMonitoredGpus, &options_copy);
     clean_ncurses(*interface);
diff --git a/src/nvlink_nvidia_disabled.c b/src/nvlink_nvidia_disabled.c
new file mode 100644
index 0000000..124b092
--- /dev/null
+++ b/src/nvlink_nvidia_disabled.c
@@ -0,0 +1,33 @@
+/*
+ * Fallback implementations for NVLink functions when NVIDIA support is disabled.
+ * All return 0 / false / no-op to keep the build clean when no NVIDIA GPUs
+ * are present at compile time.
+ */
+
+#include "nvtop/extract_gpuinfo_common.h"
+
+unsigned nvtop_get_nvlink_info(struct gpu_info *gpu_info, struct nvlink_info *nvlink_info) {
+  (void)gpu_info;
+  (void)nvlink_info;
+  return 0;
+}
+
+bool nvtop_get_nvlink_error_counts(struct gpu_info *gpu_info,
+                                    unsigned long long *out_errors,
+                                    unsigned long long *out_corrections,
+                                    unsigned long long *out_ecc) {
+  (void)gpu_info;
+  (void)out_errors;
+  (void)out_corrections;
+  (void)out_ecc;
+  return false;
+}
+
+bool nvtop_probe_nvlink_list(struct list_head *devices) {
+  (void)devices;
+  return false;
+}
+
+void nvtop_reset_nvlink_cache(struct gpu_info *gpu_info) {
+  (void)gpu_info;
+}
diff --git a/src/nvtop.c b/src/nvtop.c
index 61302d2..8c1598e 100644
--- a/src/nvtop.c
+++ b/src/nvtop.c
@@ -312,6 +312,9 @@ int main(int argc, char **argv) {
   unsigned numMonitoredGpus =
       interface_check_and_fix_monitored_gpus(allDevCount, &monitoredGpus, &nonMonitoredGpus, &allDevicesOptions);
 
+  // Probe for NVLink before layout computation
+  nvtop_probe_nvlink_list(&monitoredGpus);
+
   if (allDevicesOptions.show_startup_messages) {
     bool dont_show_again = show_information_messages(numWarningMessages, warningMessages);
     if (dont_show_again) {
@@ -334,6 +337,10 @@ int main(int argc, char **argv) {
       signal_cont_received = 0;
       update_window_size_to_terminal_size(interface);
     }
+    // Probe NVLink state BEFORE monitored-set-change check, so that
+    // any_device_has_nvlink_active is set before initialize_all_windows()
+    // reads it for layout decisions.
+    nvtop_probe_nvlink_list(&monitoredGpus);
     interface_check_monitored_gpu_change(&interface, allDevCount, &numMonitoredGpus, &monitoredGpus, &nonMonitoredGpus);
     if (time_slept >= interface_update_interval(interface)) {
       gpuinfo_refresh_dynamic_info(&monitoredGpus);