diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f580287..c3962297 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ Unreleased ---------- - Added support for relocatable ELF and DWARF - Added support for kernel module symbolization with DWARF data +- Improved DWARF symbolization performance for some debug info variants - Bumped minimum supported Rust version to `1.88` diff --git a/src/dwarf/unit.rs b/src/dwarf/unit.rs index ff08e1da..3234322e 100644 --- a/src/dwarf/unit.rs +++ b/src/dwarf/unit.rs @@ -27,6 +27,8 @@ use std::cell::OnceCell; +use crate::util::OnceCellExt as _; + use super::function::Function; use super::function::Functions; use super::lines::Lines; @@ -59,8 +61,11 @@ pub(super) struct UnitRange { #[derive(Debug)] pub(super) struct Unit<'dwarf> { offset: gimli::DebugInfoOffset< as gimli::Reader>::Offset>, - dw_unit: gimli::Unit>, - lang: Option, + /// The gimli unit, lazily constructed from the header. + dw_unit: OnceCell>>>, + /// The unit header, stored for lazy construction of `dw_unit`. + header: gimli::UnitHeader>, + lang: OnceCell>, lines: OnceCell>>, funcs: OnceCell>>, dwo: OnceCell>>>, @@ -73,10 +78,12 @@ impl<'dwarf> Unit<'dwarf> { lang: Option, lines: OnceCell>, ) -> Self { + let header = unit.header.clone(); Self { offset, - dw_unit: unit, - lang, + dw_unit: OnceCell::from(Ok(unit)), + header, + lang: OnceCell::from(lang), lines: lines .into_inner() .map(Result::Ok) @@ -87,8 +94,57 @@ impl<'dwarf> Unit<'dwarf> { } } + /// Create a unit with deferred `gimli::Unit` construction. + /// + /// The full `gimli::Unit` (which involves abbreviation parsing and + /// line program header parsing) is deferred until first access. + pub(super) fn new_deferred( + offset: gimli::DebugInfoOffset< as gimli::Reader>::Offset>, + header: gimli::UnitHeader>, + ) -> Self { + Self { + offset, + dw_unit: OnceCell::new(), + header, + lang: OnceCell::new(), + lines: OnceCell::new(), + funcs: OnceCell::new(), + dwo: OnceCell::new(), + } + } + + /// Get or lazily construct the `gimli::Unit`. + fn ensure_dw_unit(&self, units: &Units<'dwarf>) -> gimli::Result<&gimli::Unit>> { + let dw_unit = self + .dw_unit + .get_or_init(|| units.dwarf().unit(self.header.clone())) + .as_ref() + .map_err(|err| *err)?; + + // Lazily extract language from the root DIE if not yet known. + let _lang = self.lang.get_or_try_init_(|| { + let unit_ref = units.unit_ref(dw_unit); + let mut cursor = unit_ref.entries_raw(None)?; + if let Some(abbrev) = cursor.read_abbreviation()? { + for spec in abbrev.attributes() { + let attr = cursor.read_attribute(*spec)?; + if attr.name() == gimli::DW_AT_language { + if let gimli::AttributeValue::Language(val) = attr.value() { + return Ok(Some(val)) + } + break; + } + } + } + gimli::Result::Ok(None) + })?; + + Ok(dw_unit) + } + fn process_dwo( &self, + dw_unit: &gimli::Unit>, dwo_dwarf: Option>>, ) -> gimli::Result>> { let dwo_dwarf = match dwo_dwarf { @@ -102,7 +158,7 @@ impl<'dwarf> Unit<'dwarf> { }; let mut dwo_unit = dwo_dwarf.unit(dwo_header)?; - let () = dwo_unit.copy_relocated_attributes(&self.dw_unit); + let () = dwo_unit.copy_relocated_attributes(dw_unit); Ok(Some(DwoUnit { dwarf: dwo_dwarf, @@ -114,12 +170,14 @@ impl<'dwarf> Unit<'dwarf> { &'unit self, units: &'unit Units<'dwarf>, ) -> gimli::Result>> { + let dw_unit = self.ensure_dw_unit(units)?; + let map_dwo_result = |dwo_result: &'unit gimli::Result>>| { dwo_result .as_ref() .map(|dwo_unit| match dwo_unit { Some(dwo_unit) => dwo_unit.unit_ref(), - None => units.unit_ref(&self.dw_unit), + None => units.unit_ref(dw_unit), }) .map_err(|err| *err) }; @@ -128,14 +186,14 @@ impl<'dwarf> Unit<'dwarf> { return map_dwo_result(result) } - let dwo_id = match self.dw_unit.dwo_id { + let dwo_id = match dw_unit.dwo_id { Some(dwo_id) => dwo_id, None => return map_dwo_result(self.dwo.get_or_init(|| Ok(None))), }; let result = self .dwo - .get_or_init(|| self.process_dwo(units.load_dwo(dwo_id)?)); + .get_or_init(|| self.process_dwo(dw_unit, units.load_dwo(dwo_id)?)); map_dwo_result(result) } @@ -169,7 +227,8 @@ impl<'dwarf> Unit<'dwarf> { &self, units: &Units<'dwarf>, ) -> gimli::Result>> { - let ilnp = match self.dw_unit.line_program { + let dw_unit = self.ensure_dw_unit(units)?; + let ilnp = match dw_unit.line_program { Some(ref ilnp) => ilnp, None => return Ok(None), }; @@ -178,7 +237,7 @@ impl<'dwarf> Unit<'dwarf> { .get_or_init(|| { // NB: line information is always stored in the main // debug file so this does not need to handle DWOs. - let unit = units.unit_ref(&self.dw_unit); + let unit = units.unit_ref(dw_unit); Lines::parse(unit, ilnp.clone()) }) .as_ref() @@ -249,15 +308,15 @@ impl<'dwarf> Unit<'dwarf> { self.offset } - /// Retrieve the underlying [`gimli::Unit`] object. + /// Retrieve the underlying [`gimli::Unit`] header. #[inline] - pub(super) fn dw_unit(&self) -> &gimli::Unit> { - &self.dw_unit + pub(super) fn header(&self) -> &gimli::UnitHeader> { + &self.header } /// Attempt to retrieve the compilation unit's source code language. #[inline] pub(super) fn language(&self) -> Option { - self.lang + self.lang.get().copied().flatten() } } diff --git a/src/dwarf/units.rs b/src/dwarf/units.rs index 62674c85..bf20a4bf 100644 --- a/src/dwarf/units.rs +++ b/src/dwarf/units.rs @@ -88,69 +88,12 @@ impl<'dwarf> Units<'dwarf> { } _ => true, }; - let dw_unit = match sections.unit(header) { - Ok(dw_unit) => dw_unit, - Err(_) => continue, - }; - let dw_unit_ref = gimli::UnitRef::new(§ions, &dw_unit); - let mut lang = None; + // Try to get ranges from .debug_aranges first, before doing + // any expensive unit parsing (abbreviations, line program + // headers, etc.). if need_unit_range { - let mut entries = dw_unit_ref.entries_raw(None)?; - - let abbrev = match entries.read_abbreviation()? { - Some(abbrev) => abbrev, - None => continue, - }; - - let mut ranges = RangeAttributes::default(); - for spec in abbrev.attributes() { - let attr = entries.read_attribute(*spec)?; - match attr.name() { - gimli::DW_AT_low_pc => match attr.value() { - gimli::AttributeValue::Addr(val) => ranges.low_pc = Some(val), - gimli::AttributeValue::DebugAddrIndex(index) => { - ranges.low_pc = Some(sections.address(&dw_unit, index)?); - } - _ => {} - }, - gimli::DW_AT_high_pc => match attr.value() { - gimli::AttributeValue::Addr(val) => ranges.high_pc = Some(val), - gimli::AttributeValue::DebugAddrIndex(index) => { - ranges.high_pc = Some(sections.address(&dw_unit, index)?); - } - gimli::AttributeValue::Udata(val) => ranges.size = Some(val), - _ => {} - }, - gimli::DW_AT_ranges => { - ranges.ranges_offset = - sections.attr_ranges_offset(&dw_unit, attr.value())?; - } - gimli::DW_AT_language => { - if let gimli::AttributeValue::Language(val) = attr.value() { - lang = Some(val); - } - } - _ => {} - } - } - - // Find the address ranges for the CU, using in order of preference: - // - DW_AT_ranges - // - .debug_aranges - // - DW_AT_low_pc/DW_AT_high_pc - // - // Using DW_AT_ranges before .debug_aranges is possibly an arbitrary choice, - // but the feeling is that DW_AT_ranges is more likely to be reliable or - // complete if it is present. - // - // .debug_aranges must be used before DW_AT_low_pc/DW_AT_high_pc because - // it has been observed on macOS that DW_AT_ranges was not emitted even for - // discontiguous CUs. - let i = match ranges.ranges_offset { - Some(_) => None, - None => aranges.binary_search_by_key(&offset, |x| x.0).ok(), - }; + let i = aranges.binary_search_by_key(&offset, |x| x.0).ok(); if let Some(mut i) = i { // There should be only one set per CU, but in practice multiple // sets have been observed. This is probably a compiler bug, but @@ -182,17 +125,77 @@ impl<'dwarf> Units<'dwarf> { } } } - if need_unit_range { - need_unit_range = !ranges.for_each_range(dw_unit_ref, |range| { - unit_ranges.push(UnitRange { - range, - unit_id, - max_end: 0, - }); - })?; + } + + // If we got ranges from aranges, we can defer the expensive + // `gimli::Unit` construction (abbreviation parsing, line + // program header parsing) until the unit is actually queried. + if !need_unit_range { + res_units.push(Unit::new_deferred(offset, header)); + continue; + } + + let dw_unit = match sections.unit(header) { + Ok(dw_unit) => dw_unit, + Err(_) => continue, + }; + + let dw_unit_ref = gimli::UnitRef::new(§ions, &dw_unit); + let mut lang = None; + let mut entries = dw_unit_ref.entries_raw(None)?; + + let abbrev = match entries.read_abbreviation()? { + Some(abbrev) => abbrev, + None => continue, + }; + + let mut ranges = RangeAttributes::default(); + for spec in abbrev.attributes() { + let attr = entries.read_attribute(*spec)?; + match attr.name() { + gimli::DW_AT_low_pc => match attr.value() { + gimli::AttributeValue::Addr(val) => ranges.low_pc = Some(val), + gimli::AttributeValue::DebugAddrIndex(index) => { + ranges.low_pc = Some(sections.address(&dw_unit, index)?); + } + _ => {} + }, + gimli::DW_AT_high_pc => match attr.value() { + gimli::AttributeValue::Addr(val) => ranges.high_pc = Some(val), + gimli::AttributeValue::DebugAddrIndex(index) => { + ranges.high_pc = Some(sections.address(&dw_unit, index)?); + } + gimli::AttributeValue::Udata(val) => ranges.size = Some(val), + _ => {} + }, + gimli::DW_AT_ranges => { + ranges.ranges_offset = + sections.attr_ranges_offset(&dw_unit, attr.value())?; + } + gimli::DW_AT_language => { + if let gimli::AttributeValue::Language(val) = attr.value() { + lang = Some(val); + } + } + _ => {} } } + // Find the address ranges for the CU, using in order of preference: + // - DW_AT_ranges + // - DW_AT_low_pc/DW_AT_high_pc + // + // .debug_aranges was already checked before constructing the + // `gimli::Unit`, so if we're here, aranges didn't have ranges + // for this CU. + need_unit_range = !ranges.for_each_range(dw_unit_ref, |range| { + unit_ranges.push(UnitRange { + range, + unit_id, + max_end: 0, + }); + })?; + let lines = OnceCell::new(); if need_unit_range { // The unit did not declare any ranges. @@ -238,6 +241,12 @@ impl<'dwarf> Units<'dwarf> { Ok(slf) } + /// Retrieve a reference to the underlying [`gimli::Dwarf`]. + #[inline] + pub(crate) fn dwarf(&self) -> &gimli::Dwarf> { + &self.dwarf + } + pub(super) fn load_dwo( &self, dwo_id: gimli::DwoId, @@ -269,14 +278,14 @@ impl<'dwarf> Units<'dwarf> { { // There is never a DIE at the unit offset or before the first unit. Ok(_) | Err(0) => return Err(gimli::Error::NoEntryAtGivenOffset(offset.0 as u64)), - Err(i) => self.units[i - 1].dw_unit(), + Err(i) => &self.units[i - 1], }; let unit_offset = offset - .to_unit_offset(&unit.header) + .to_unit_offset(unit.header()) .ok_or(gimli::Error::NoEntryAtGivenOffset(offset.0 as u64))?; - let unit = gimli::UnitRef::new(&self.dwarf, unit); - Ok((unit, unit_offset)) + let unit_ref = unit.unit_ref(self)?; + Ok((unit_ref, unit_offset)) } /// Finds the CUs for the function address given.