Skip to content
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ No LibreOffice, no Chromium, no Docker — just a single binary powered by [Typs

## Features

- **DOCX** — paragraphs, inline formatting (bold/italic/underline/color), tables, images, lists, headers/footers, page setup
- **DOCX** — paragraphs, inline formatting (bold/italic/underline/color), tables, images, drawing shapes, ordered/nested lists, syntax-highlighted code, headers/footers, page setup
- **PPTX** — slides, text boxes, shapes, tables (with theme-based table styles), images, slide masters, speaker notes, gradient backgrounds, shadow/reflection effects
- **XLSX** — sheets, cell formatting, merged cells, column widths, row heights, conditional formatting (DataBar, IconSet)
- **PDF/A-2b** — archival-compliant output via `--pdf-a`
Expand Down Expand Up @@ -128,7 +128,7 @@ Available functions: `convertToPdf(data, format)`, `convertDocxToPdf(data)`, `co

| Format | Status | Key Features |
|--------|--------|-------------|
| DOCX | Supported | Text, tables, images, lists, headers/footers, page setup |
| DOCX | Supported | Text, tables, images, drawing shapes, lists, code highlighting, headers/footers, page setup |
| PPTX | Supported | Slides, text boxes, shapes, tables, images, masters, gradients, effects |
| XLSX | Supported | Sheets, formatting, merged cells, column/row sizing, conditional formatting |

Expand Down
Binary file added assets/issue-176-after.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
19 changes: 19 additions & 0 deletions crates/office2pdf/src/ir/elements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pub enum Block {
Image(ImageData),
FloatingImage(FloatingImage),
FloatingTextBox(FloatingTextBox),
FloatingShape(FloatingShape),
List(List),
MathEquation(MathEquation),
Chart(Chart),
Expand Down Expand Up @@ -125,6 +126,24 @@ pub struct FloatingTextBox {
pub offset_y: f64,
}

/// A floating geometric shape (rectangle, line/arrow, ellipse, …) positioned
/// with an anchor offset. Used for DrawingML word-processing shapes (`wps:wsp`)
/// that carry geometry but no text box — these have no docx-rs representation
/// and would otherwise be dropped (issue #176).
#[derive(Debug, Clone)]
pub struct FloatingShape {
pub shape: Shape,
/// On-page bounding-box width in points (from `wp:extent`).
pub width: f64,
/// On-page bounding-box height in points (from `wp:extent`).
pub height: f64,
/// Horizontal offset in points from the anchor reference.
pub offset_x: f64,
/// Vertical offset in points from the anchor reference.
pub offset_y: f64,
pub wrap_mode: WrapMode,
}

/// Vertical alignment for fixed text box content.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum TextBoxVerticalAlign {
Expand Down
86 changes: 69 additions & 17 deletions crates/office2pdf/src/parser/docx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ use crate::parser::Parser;
#[cfg(test)]
use self::contexts::scan_table_headers;
use self::contexts::{
BidiContext, ChartContext, DocxConversionContext, DrawingTextBoxContext, DrawingTextBoxInfo,
MathContext, NoteContext, SmallCapsContext, TableHeaderContext, VmlTextBoxContext,
VmlTextBoxInfo, WrapContext, build_chart_context_from_xml, build_math_context_from_xml,
build_note_context_from_xml, build_wrap_context_from_xml,
BidiContext, ChartContext, DocxConversionContext, DrawingShapeContext, DrawingTextBoxContext,
DrawingTextBoxInfo, MathContext, NoteContext, SmallCapsContext, TableHeaderContext,
VmlTextBoxContext, VmlTextBoxInfo, WrapContext, build_chart_context_from_xml,
build_math_context_from_xml, build_note_context_from_xml, build_wrap_context_from_xml,
extract_column_layout_from_section_property, is_note_reference_run, read_zip_text,
scan_column_layouts,
};
Expand All @@ -42,9 +42,9 @@ use self::styles::{
};
use self::tables::convert_table;
use self::text::{
extract_doc_default_text_style, extract_paragraph_style, extract_run_style, extract_run_text,
extract_run_text_skip_column_breaks, extract_tab_stop_overrides, is_column_break,
parse_hex_color, resolve_hyperlink_url,
extract_doc_default_text_style, extract_paragraph_style, extract_run_style,
extract_run_style_id, extract_run_text, extract_run_text_skip_column_breaks,
extract_tab_stop_overrides, is_column_break, parse_hex_color, resolve_hyperlink_url,
};
#[cfg(test)]
use self::text::{extract_tab_stops, resolve_highlight_color};
Expand Down Expand Up @@ -112,6 +112,7 @@ fn build_zip_preparse_assets(data: &[u8]) -> ZipPreParseAssets {
let notes = build_note_context_from_xml(doc_xml.as_deref(), &mut archive);
let wraps = build_wrap_context_from_xml(doc_xml.as_deref());
let drawing_text_boxes = DrawingTextBoxContext::from_xml(doc_xml.as_deref());
let drawing_shapes = DrawingShapeContext::from_xml(doc_xml.as_deref());
let table_headers = TableHeaderContext::from_xml(doc_xml.as_deref());
let vml_text_boxes = VmlTextBoxContext::from_xml(doc_xml.as_deref());
let math = build_math_context_from_xml(doc_xml.as_deref());
Expand All @@ -127,6 +128,7 @@ fn build_zip_preparse_assets(data: &[u8]) -> ZipPreParseAssets {
notes,
wraps,
drawing_text_boxes,
drawing_shapes,
table_headers,
vml_text_boxes,
bidi,
Expand All @@ -147,6 +149,7 @@ fn build_zip_preparse_assets(data: &[u8]) -> ZipPreParseAssets {
notes: NoteContext::empty(),
wraps: WrapContext::empty(),
drawing_text_boxes: DrawingTextBoxContext::from_xml(None),
drawing_shapes: DrawingShapeContext::from_xml(None),
table_headers: TableHeaderContext::from_xml(None),
vml_text_boxes: VmlTextBoxContext::from_xml(None),
bidi: BidiContext::from_xml(None),
Expand Down Expand Up @@ -386,6 +389,7 @@ fn build_text_run(
run_property: &docx_rs::RunProperty,
is_small_caps: bool,
resolved_style: Option<&ResolvedStyle>,
style_map: &StyleMap,
href: Option<String>,
) -> Option<Run> {
if text.is_empty() {
Expand All @@ -395,6 +399,14 @@ fn build_text_run(
if is_small_caps {
explicit_style.small_caps = Some(true);
}
// Layer the referenced character style (`<w:rStyle>`, e.g. a syntax
// highlighting token) beneath the run's explicit properties so its color
// and weight apply while explicit run formatting still wins (issue #176).
if let Some(char_style) = extract_run_style_id(run_property).and_then(|id| style_map.get(&id)) {
let mut combined: TextStyle = char_style.text.clone();
combined.merge_from(&explicit_style);
explicit_style = combined;
}
Some(Run {
text,
style: merge_text_style(&explicit_style, resolved_style),
Expand Down Expand Up @@ -435,6 +447,16 @@ fn extract_run_children_media(
drawing, images, hyperlinks, style_map, ctx,
));
}
// A `<w:drawing>` that docx-rs cannot classify as a picture or a text box
// (geometry-only `wps:wsp` shapes) leaves `data == None`. Pair each such
// drawing, in document order, with the geometry scanned from the raw XML
// so rectangles, lines and arrows are not dropped (issue #176).
if let docx_rs::RunChild::Drawing(drawing) = run_child
&& drawing.data.is_none()
&& let Some(shape) = ctx.drawing_shapes.consume_next()
{
text_box_blocks.push(Block::FloatingShape(shape));
}
if let docx_rs::RunChild::Shape(shape) = run_child {
let vml_text_box: VmlTextBoxInfo = ctx.vml_text_boxes.consume_next();
if let Some(floating_text_box) = extract_vml_shape_text_box(shape, &vml_text_box) {
Expand Down Expand Up @@ -465,6 +487,7 @@ fn process_hyperlink_runs(
hyperlink: &docx_rs::Hyperlink,
hyperlinks: &HyperlinkMap,
resolved_style: Option<&ResolvedStyle>,
style_map: &StyleMap,
ctx: &DocxConversionContext,
runs: &mut Vec<Run>,
) {
Expand All @@ -478,6 +501,7 @@ fn process_hyperlink_runs(
&run.run_property,
hl_small_caps,
resolved_style,
style_map,
href.clone(),
) {
runs.push(ir_run);
Expand Down Expand Up @@ -514,7 +538,9 @@ fn convert_paragraph_blocks(
// Collect text runs and detect inline images
let mut runs: Vec<Run> = Vec::new();
let mut inline_images: Vec<Block> = Vec::new();
let mut emitted_text_box_blocks: bool = false;
let mut emitted_paragraph: bool = false;
let mut emitted_media_blocks: bool = false;
let mut emitted_floating_anchor: bool = false;

for child in &para.children {
match child {
Expand Down Expand Up @@ -545,13 +571,17 @@ fn convert_paragraph_blocks(
);

if !media.text_box_blocks.is_empty() {
emitted_media_blocks = true;
emitted_floating_anchor |= media.text_box_blocks.iter().any(|block| {
matches!(block, Block::FloatingShape(_) | Block::FloatingTextBox(_))
});
if !runs.is_empty() {
out.append(&mut inline_images);
push_paragraph_from_runs(out, para, resolved_style, is_rtl, &mut runs);
emitted_paragraph = true;
} else if !inline_images.is_empty() {
out.append(&mut inline_images);
}
emitted_text_box_blocks = true;
out.extend(media.text_box_blocks);
}

Expand All @@ -560,27 +590,45 @@ fn convert_paragraph_blocks(
if !runs.is_empty() {
out.append(&mut inline_images);
push_paragraph_from_runs(out, para, resolved_style, is_rtl, &mut runs);
emitted_paragraph = true;
}
out.push(Block::ColumnBreak);

// Still extract any text from this run (after the break)
let text: String = extract_run_text_skip_column_breaks(run);
if let Some(ir_run) =
build_text_run(text, &run.run_property, is_small_caps, resolved_style, None)
{
if let Some(ir_run) = build_text_run(
text,
&run.run_property,
is_small_caps,
resolved_style,
style_map,
None,
) {
runs.push(ir_run);
}
} else {
let text: String = extract_run_text(run);
if let Some(ir_run) =
build_text_run(text, &run.run_property, is_small_caps, resolved_style, None)
{
if let Some(ir_run) = build_text_run(
text,
&run.run_property,
is_small_caps,
resolved_style,
style_map,
None,
) {
runs.push(ir_run);
}
}
}
docx_rs::ParagraphChild::Hyperlink(hyperlink) => {
process_hyperlink_runs(hyperlink, hyperlinks, resolved_style, ctx, &mut runs);
process_hyperlink_runs(
hyperlink,
hyperlinks,
resolved_style,
style_map,
ctx,
&mut runs,
);
}
_ => {}
}
Expand All @@ -589,7 +637,11 @@ fn convert_paragraph_blocks(
// Emit image blocks before the paragraph (inline images are block-level in our IR)
out.extend(inline_images);

if !runs.is_empty() || !emitted_text_box_blocks {
if !runs.is_empty() || !emitted_media_blocks || (emitted_floating_anchor && !emitted_paragraph)
{
// Keep paragraph marks for floating drawing anchors. The drawing itself
// is positioned by offsets, but the source paragraph still contributes
// to flow spacing between the drawing cluster and following content.
push_paragraph_from_runs(out, para, resolved_style, is_rtl, &mut runs);
}
}
Expand Down
Loading
Loading