Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## [Unreleased]

### Fixed

- Unicode strings with combining characters caused incorrect column tracking, leading to wrong indentation decisions. [#2945](https://github.com/fsprojects/fantomas/issues/2945)

## [8.0.0-alpha-009] - 2026-04-03

### Fixed
Expand Down
7 changes: 7 additions & 0 deletions src/Fantomas.Core.Tests/StringTests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -339,3 +339,10 @@ let ``unicode null character should be recognized as a trivia item, 2050`` () =
[<Test>]
let ``character quotes should be preserved, 3076`` () =
formatAST false "let s = 'A'" config |> should equal "let s = 'A'\n"

[<Test>]
let ``string with Unicode combining characters should not affect formatting decisions, 2945`` () =
// Combining characters (e.g. U+036E, U+0312, U+036B) have no visual width of their own.
// Column tracking must use grapheme clusters, not UTF-16 code units.
formatSourceString "let x = \"Zal\u036e\u0312\u036bgo\"" config
|> should equal "let x = \"Zal\u036e\u0312\u036bgo\"\n"
15 changes: 15 additions & 0 deletions src/Fantomas.Core.Tests/UtilsTests.fs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module Fantomas.Core.Tests.UtilsTests

open NUnit.Framework
open FsUnit
open Fantomas.Core
open FsCheck

Expand Down Expand Up @@ -49,3 +50,17 @@ let ``when predicate returns true until certain index`` () =
}

property |> Prop.forAll (Arb.fromGen gen) |> Check.QuickThrowOnFailure

[<Test>]
let ``String.visualWidth counts grapheme clusters not UTF-16 code units, 2945`` () =
// ASCII: visual width equals String.length
String.visualWidth "hello" |> should equal 5
// Combining characters attach to the preceding base character with no visual advance.
// U+036E, U+0312, U+036B are all combining marks.
// "l" + 3 combining marks = 1 grapheme cluster
let combining = "l\u036e\u0312\u036b"
String.visualWidth combining |> should equal 1
// String.length counts UTF-16 code units, not grapheme clusters
String.length combining |> should equal 4
// Empty string
String.visualWidth "" |> should equal 0
2 changes: 1 addition & 1 deletion src/Fantomas.Core/Context.fs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ module WriterModel =
| Write s
| WriteTrivia s ->
{ m with
Column = m.Column + (String.length s) }
Column = m.Column + (String.visualWidth s) }
| WriteBeforeNewline s -> { m with WriteBeforeNewline = s }
| IndentBy x ->
{ m with
Expand Down
14 changes: 14 additions & 0 deletions src/Fantomas.Core/Utils.fs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,20 @@ module String =
let isNotNullOrEmpty = String.IsNullOrEmpty >> not
let isNotNullOrWhitespace = String.IsNullOrWhiteSpace >> not

let visualWidth (s: string) =
// Fast path: most F# source tokens are pure ASCII, avoid allocating StringInfo.
let mutable hasNonAscii = false
let mutable i = 0

while not hasNonAscii && i < s.Length do
if s.[i] > '\u007F' then hasNonAscii <- true
i <- i + 1

if hasNonAscii then
Globalization.StringInfo(s).LengthInTextElements
else
s.Length

module List =
let chooseState f state l =
let mutable s = state
Expand Down
5 changes: 5 additions & 0 deletions src/Fantomas.Core/Utils.fsi
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ module String =
val empty: string
val isNotNullOrEmpty: (string -> bool)
val isNotNullOrWhitespace: (string -> bool)
/// Returns the visual column width of a string, counting Unicode grapheme clusters.
/// Unlike String.length, this correctly handles combining characters (e.g. diacritics)
/// which attach to a preceding character and do not advance the visual column.
/// Uses a fast path for pure-ASCII strings (no allocation).
val visualWidth: s: string -> int

module List =
val chooseState: f: ('a -> 'b -> 'a * 'c option) -> state: 'a -> l: 'b list -> 'c list
Expand Down
Loading