Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ Unreleased
----------
- Added support for relocatable ELF and DWARF
- Added support for kernel module symbolization with DWARF data
- Improved DWARF symbolization performance for some debug info variants
- Bumped minimum supported Rust version to `1.88`


Expand Down
87 changes: 73 additions & 14 deletions src/dwarf/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

use std::cell::OnceCell;

use crate::util::OnceCellExt as _;

use super::function::Function;
use super::function::Functions;
use super::lines::Lines;
Expand Down Expand Up @@ -59,8 +61,11 @@ pub(super) struct UnitRange {
#[derive(Debug)]
pub(super) struct Unit<'dwarf> {
offset: gimli::DebugInfoOffset<<R<'dwarf> as gimli::Reader>::Offset>,
dw_unit: gimli::Unit<R<'dwarf>>,
lang: Option<gimli::DwLang>,
/// The gimli unit, lazily constructed from the header.
dw_unit: OnceCell<gimli::Result<gimli::Unit<R<'dwarf>>>>,
/// The unit header, stored for lazy construction of `dw_unit`.
header: gimli::UnitHeader<R<'dwarf>>,
lang: OnceCell<Option<gimli::DwLang>>,
lines: OnceCell<gimli::Result<Lines<'dwarf>>>,
funcs: OnceCell<gimli::Result<Functions<'dwarf>>>,
dwo: OnceCell<gimli::Result<Option<DwoUnit<'dwarf>>>>,
Expand All @@ -73,10 +78,12 @@ impl<'dwarf> Unit<'dwarf> {
lang: Option<gimli::DwLang>,
lines: OnceCell<Lines<'dwarf>>,
) -> Self {
let header = unit.header.clone();
Self {
offset,
dw_unit: unit,
lang,
dw_unit: OnceCell::from(Ok(unit)),
header,
lang: OnceCell::from(lang),
lines: lines
.into_inner()
.map(Result::Ok)
Expand All @@ -87,8 +94,57 @@ impl<'dwarf> Unit<'dwarf> {
}
}

/// Create a unit with deferred `gimli::Unit` construction.
///
/// The full `gimli::Unit` (which involves abbreviation parsing and
/// line program header parsing) is deferred until first access.
pub(super) fn new_deferred(
offset: gimli::DebugInfoOffset<<R<'dwarf> as gimli::Reader>::Offset>,
header: gimli::UnitHeader<R<'dwarf>>,
) -> Self {
Self {
offset,
dw_unit: OnceCell::new(),
header,
lang: OnceCell::new(),
lines: OnceCell::new(),
funcs: OnceCell::new(),
dwo: OnceCell::new(),
}
}

/// Get or lazily construct the `gimli::Unit`.
fn ensure_dw_unit(&self, units: &Units<'dwarf>) -> gimli::Result<&gimli::Unit<R<'dwarf>>> {
let dw_unit = self
.dw_unit
.get_or_init(|| units.dwarf().unit(self.header.clone()))
.as_ref()
.map_err(|err| *err)?;

// Lazily extract language from the root DIE if not yet known.
let _lang = self.lang.get_or_try_init_(|| {
let unit_ref = units.unit_ref(dw_unit);
let mut cursor = unit_ref.entries_raw(None)?;
if let Some(abbrev) = cursor.read_abbreviation()? {
for spec in abbrev.attributes() {
let attr = cursor.read_attribute(*spec)?;
if attr.name() == gimli::DW_AT_language {
if let gimli::AttributeValue::Language(val) = attr.value() {
return Ok(Some(val))
}
break;
}
}
}
gimli::Result::Ok(None)
})?;

Ok(dw_unit)
}

fn process_dwo(
&self,
dw_unit: &gimli::Unit<R<'dwarf>>,
dwo_dwarf: Option<gimli::Dwarf<R<'dwarf>>>,
) -> gimli::Result<Option<DwoUnit<'dwarf>>> {
let dwo_dwarf = match dwo_dwarf {
Expand All @@ -102,7 +158,7 @@ impl<'dwarf> Unit<'dwarf> {
};

let mut dwo_unit = dwo_dwarf.unit(dwo_header)?;
let () = dwo_unit.copy_relocated_attributes(&self.dw_unit);
let () = dwo_unit.copy_relocated_attributes(dw_unit);

Ok(Some(DwoUnit {
dwarf: dwo_dwarf,
Expand All @@ -114,12 +170,14 @@ impl<'dwarf> Unit<'dwarf> {
&'unit self,
units: &'unit Units<'dwarf>,
) -> gimli::Result<gimli::UnitRef<'unit, R<'dwarf>>> {
let dw_unit = self.ensure_dw_unit(units)?;

let map_dwo_result = |dwo_result: &'unit gimli::Result<Option<DwoUnit<'dwarf>>>| {
dwo_result
.as_ref()
.map(|dwo_unit| match dwo_unit {
Some(dwo_unit) => dwo_unit.unit_ref(),
None => units.unit_ref(&self.dw_unit),
None => units.unit_ref(dw_unit),
})
.map_err(|err| *err)
};
Expand All @@ -128,14 +186,14 @@ impl<'dwarf> Unit<'dwarf> {
return map_dwo_result(result)
}

let dwo_id = match self.dw_unit.dwo_id {
let dwo_id = match dw_unit.dwo_id {
Some(dwo_id) => dwo_id,
None => return map_dwo_result(self.dwo.get_or_init(|| Ok(None))),
};

let result = self
.dwo
.get_or_init(|| self.process_dwo(units.load_dwo(dwo_id)?));
.get_or_init(|| self.process_dwo(dw_unit, units.load_dwo(dwo_id)?));
map_dwo_result(result)
}

Expand Down Expand Up @@ -169,7 +227,8 @@ impl<'dwarf> Unit<'dwarf> {
&self,
units: &Units<'dwarf>,
) -> gimli::Result<Option<&Lines<'dwarf>>> {
let ilnp = match self.dw_unit.line_program {
let dw_unit = self.ensure_dw_unit(units)?;
let ilnp = match dw_unit.line_program {
Some(ref ilnp) => ilnp,
None => return Ok(None),
};
Expand All @@ -178,7 +237,7 @@ impl<'dwarf> Unit<'dwarf> {
.get_or_init(|| {
// NB: line information is always stored in the main
// debug file so this does not need to handle DWOs.
let unit = units.unit_ref(&self.dw_unit);
let unit = units.unit_ref(dw_unit);
Lines::parse(unit, ilnp.clone())
})
.as_ref()
Expand Down Expand Up @@ -249,15 +308,15 @@ impl<'dwarf> Unit<'dwarf> {
self.offset
}

/// Retrieve the underlying [`gimli::Unit`] object.
/// Retrieve the underlying [`gimli::Unit`] header.
#[inline]
pub(super) fn dw_unit(&self) -> &gimli::Unit<R<'dwarf>> {
&self.dw_unit
pub(super) fn header(&self) -> &gimli::UnitHeader<R<'dwarf>> {
&self.header
}

/// Attempt to retrieve the compilation unit's source code language.
#[inline]
pub(super) fn language(&self) -> Option<gimli::DwLang> {
self.lang
self.lang.get().copied().flatten()
}
}
155 changes: 82 additions & 73 deletions src/dwarf/units.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,69 +88,12 @@ impl<'dwarf> Units<'dwarf> {
}
_ => true,
};
let dw_unit = match sections.unit(header) {
Ok(dw_unit) => dw_unit,
Err(_) => continue,
};

let dw_unit_ref = gimli::UnitRef::new(&sections, &dw_unit);
let mut lang = None;
// Try to get ranges from .debug_aranges first, before doing
// any expensive unit parsing (abbreviations, line program
// headers, etc.).
if need_unit_range {
let mut entries = dw_unit_ref.entries_raw(None)?;

let abbrev = match entries.read_abbreviation()? {
Some(abbrev) => abbrev,
None => continue,
};

let mut ranges = RangeAttributes::default();
for spec in abbrev.attributes() {
let attr = entries.read_attribute(*spec)?;
match attr.name() {
gimli::DW_AT_low_pc => match attr.value() {
gimli::AttributeValue::Addr(val) => ranges.low_pc = Some(val),
gimli::AttributeValue::DebugAddrIndex(index) => {
ranges.low_pc = Some(sections.address(&dw_unit, index)?);
}
_ => {}
},
gimli::DW_AT_high_pc => match attr.value() {
gimli::AttributeValue::Addr(val) => ranges.high_pc = Some(val),
gimli::AttributeValue::DebugAddrIndex(index) => {
ranges.high_pc = Some(sections.address(&dw_unit, index)?);
}
gimli::AttributeValue::Udata(val) => ranges.size = Some(val),
_ => {}
},
gimli::DW_AT_ranges => {
ranges.ranges_offset =
sections.attr_ranges_offset(&dw_unit, attr.value())?;
}
gimli::DW_AT_language => {
if let gimli::AttributeValue::Language(val) = attr.value() {
lang = Some(val);
}
}
_ => {}
}
}

// Find the address ranges for the CU, using in order of preference:
// - DW_AT_ranges
// - .debug_aranges
// - DW_AT_low_pc/DW_AT_high_pc
//
// Using DW_AT_ranges before .debug_aranges is possibly an arbitrary choice,
// but the feeling is that DW_AT_ranges is more likely to be reliable or
// complete if it is present.
//
// .debug_aranges must be used before DW_AT_low_pc/DW_AT_high_pc because
// it has been observed on macOS that DW_AT_ranges was not emitted even for
// discontiguous CUs.
let i = match ranges.ranges_offset {
Some(_) => None,
None => aranges.binary_search_by_key(&offset, |x| x.0).ok(),
};
let i = aranges.binary_search_by_key(&offset, |x| x.0).ok();
if let Some(mut i) = i {
// There should be only one set per CU, but in practice multiple
// sets have been observed. This is probably a compiler bug, but
Expand Down Expand Up @@ -182,17 +125,77 @@ impl<'dwarf> Units<'dwarf> {
}
}
}
if need_unit_range {
need_unit_range = !ranges.for_each_range(dw_unit_ref, |range| {
unit_ranges.push(UnitRange {
range,
unit_id,
max_end: 0,
});
})?;
}

// If we got ranges from aranges, we can defer the expensive
// `gimli::Unit` construction (abbreviation parsing, line
// program header parsing) until the unit is actually queried.
if !need_unit_range {
res_units.push(Unit::new_deferred(offset, header));
continue;
}

let dw_unit = match sections.unit(header) {
Ok(dw_unit) => dw_unit,
Err(_) => continue,
};

let dw_unit_ref = gimli::UnitRef::new(&sections, &dw_unit);
let mut lang = None;
let mut entries = dw_unit_ref.entries_raw(None)?;

let abbrev = match entries.read_abbreviation()? {
Some(abbrev) => abbrev,
None => continue,
};

let mut ranges = RangeAttributes::default();
for spec in abbrev.attributes() {
let attr = entries.read_attribute(*spec)?;
match attr.name() {
gimli::DW_AT_low_pc => match attr.value() {
gimli::AttributeValue::Addr(val) => ranges.low_pc = Some(val),
gimli::AttributeValue::DebugAddrIndex(index) => {
ranges.low_pc = Some(sections.address(&dw_unit, index)?);
}
_ => {}
},
gimli::DW_AT_high_pc => match attr.value() {
gimli::AttributeValue::Addr(val) => ranges.high_pc = Some(val),
gimli::AttributeValue::DebugAddrIndex(index) => {
ranges.high_pc = Some(sections.address(&dw_unit, index)?);
}
gimli::AttributeValue::Udata(val) => ranges.size = Some(val),
_ => {}
},
gimli::DW_AT_ranges => {
ranges.ranges_offset =
sections.attr_ranges_offset(&dw_unit, attr.value())?;
}
gimli::DW_AT_language => {
if let gimli::AttributeValue::Language(val) = attr.value() {
lang = Some(val);
}
}
_ => {}
}
}

// Find the address ranges for the CU, using in order of preference:
// - DW_AT_ranges
// - DW_AT_low_pc/DW_AT_high_pc
//
// .debug_aranges was already checked before constructing the
// `gimli::Unit`, so if we're here, aranges didn't have ranges
// for this CU.
need_unit_range = !ranges.for_each_range(dw_unit_ref, |range| {
unit_ranges.push(UnitRange {
range,
unit_id,
max_end: 0,
});
})?;

let lines = OnceCell::new();
if need_unit_range {
// The unit did not declare any ranges.
Expand Down Expand Up @@ -238,6 +241,12 @@ impl<'dwarf> Units<'dwarf> {
Ok(slf)
}

/// Retrieve a reference to the underlying [`gimli::Dwarf`].
#[inline]
pub(crate) fn dwarf(&self) -> &gimli::Dwarf<R<'dwarf>> {
&self.dwarf
}

pub(super) fn load_dwo(
&self,
dwo_id: gimli::DwoId,
Expand Down Expand Up @@ -269,14 +278,14 @@ impl<'dwarf> Units<'dwarf> {
{
// There is never a DIE at the unit offset or before the first unit.
Ok(_) | Err(0) => return Err(gimli::Error::NoEntryAtGivenOffset(offset.0 as u64)),
Err(i) => self.units[i - 1].dw_unit(),
Err(i) => &self.units[i - 1],
};

let unit_offset = offset
.to_unit_offset(&unit.header)
.to_unit_offset(unit.header())
.ok_or(gimli::Error::NoEntryAtGivenOffset(offset.0 as u64))?;
let unit = gimli::UnitRef::new(&self.dwarf, unit);
Ok((unit, unit_offset))
let unit_ref = unit.unit_ref(self)?;
Ok((unit_ref, unit_offset))
}

/// Finds the CUs for the function address given.
Expand Down
Loading