diff --git a/CHANGELOG.md b/CHANGELOG.md index 6701efcfc..ac734f113 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## [8.0.0-alpha-011] - 2026-04-15 + +### Fixed + +- Unicode strings with combining characters caused incorrect column tracking, leading to wrong indentation decisions. [#2945](https://github.com/fsprojects/fantomas/issues/2945) + ## [8.0.0-alpha-010] - 2026-04-15 ### Fixed diff --git a/src/Fantomas.Core.Tests/StringTests.fs b/src/Fantomas.Core.Tests/StringTests.fs index 239013c5c..10e657245 100644 --- a/src/Fantomas.Core.Tests/StringTests.fs +++ b/src/Fantomas.Core.Tests/StringTests.fs @@ -339,3 +339,10 @@ let ``unicode null character should be recognized as a trivia item, 2050`` () = [] let ``character quotes should be preserved, 3076`` () = formatAST false "let s = 'A'" config |> should equal "let s = 'A'\n" + +[] +let ``string with Unicode combining characters should not affect formatting decisions, 2945`` () = + // Combining characters (e.g. U+036E, U+0312, U+036B) have no visual width of their own. + // Column tracking must use grapheme clusters, not UTF-16 code units. + formatSourceString "let x = \"Zal\u036e\u0312\u036bgo\"" config + |> should equal "let x = \"Zal\u036e\u0312\u036bgo\"\n" diff --git a/src/Fantomas.Core.Tests/UtilsTests.fs b/src/Fantomas.Core.Tests/UtilsTests.fs index 47c296822..98867458c 100644 --- a/src/Fantomas.Core.Tests/UtilsTests.fs +++ b/src/Fantomas.Core.Tests/UtilsTests.fs @@ -1,6 +1,7 @@ module Fantomas.Core.Tests.UtilsTests open NUnit.Framework +open FsUnit open Fantomas.Core open FsCheck @@ -49,3 +50,17 @@ let ``when predicate returns true until certain index`` () = } property |> Prop.forAll (Arb.fromGen gen) |> Check.QuickThrowOnFailure + +[] +let ``String.visualWidth counts grapheme clusters not UTF-16 code units, 2945`` () = + // ASCII: visual width equals String.length + String.visualWidth "hello" |> should equal 5 + // Combining characters attach to the preceding base character with no visual advance. + // U+036E, U+0312, U+036B are all combining marks. + // "l" + 3 combining marks = 1 grapheme cluster + let combining = "l\u036e\u0312\u036b" + String.visualWidth combining |> should equal 1 + // String.length counts UTF-16 code units, not grapheme clusters + String.length combining |> should equal 4 + // Empty string + String.visualWidth "" |> should equal 0 diff --git a/src/Fantomas.Core/Context.fs b/src/Fantomas.Core/Context.fs index 75111eade..f42700341 100644 --- a/src/Fantomas.Core/Context.fs +++ b/src/Fantomas.Core/Context.fs @@ -93,7 +93,7 @@ module WriterModel = | Write s | WriteTrivia s -> { m with - Column = m.Column + (String.length s) } + Column = m.Column + (String.visualWidth s) } | WriteBeforeNewline s -> { m with WriteBeforeNewline = s } | IndentBy x -> { m with diff --git a/src/Fantomas.Core/Utils.fs b/src/Fantomas.Core/Utils.fs index 9e17f4dd7..911c58f05 100644 --- a/src/Fantomas.Core/Utils.fs +++ b/src/Fantomas.Core/Utils.fs @@ -16,6 +16,22 @@ module String = let isNotNullOrEmpty = String.IsNullOrEmpty >> not let isNotNullOrWhitespace = String.IsNullOrWhiteSpace >> not + let visualWidth (s: string) = + // Fast path: most F# source tokens are pure ASCII, avoid allocating StringInfo. + let mutable hasNonAscii = false + let mutable i = 0 + + while not hasNonAscii && i < s.Length do + if s.[i] > '\u007F' then + hasNonAscii <- true + + i <- i + 1 + + if hasNonAscii then + Globalization.StringInfo(s).LengthInTextElements + else + s.Length + module List = let chooseState f state l = let mutable s = state diff --git a/src/Fantomas.Core/Utils.fsi b/src/Fantomas.Core/Utils.fsi index 1bf4d55ff..bacab8ec1 100644 --- a/src/Fantomas.Core/Utils.fsi +++ b/src/Fantomas.Core/Utils.fsi @@ -7,6 +7,11 @@ module String = val empty: string val isNotNullOrEmpty: (string -> bool) val isNotNullOrWhitespace: (string -> bool) + /// Returns the visual column width of a string, counting Unicode grapheme clusters. + /// Unlike String.length, this correctly handles combining characters (e.g. diacritics) + /// which attach to a preceding character and do not advance the visual column. + /// Uses a fast path for pure-ASCII strings (no allocation). + val visualWidth: s: string -> int module List = val chooseState: f: ('a -> 'b -> 'a * 'c option) -> state: 'a -> l: 'b list -> 'c list