diff --git a/.github/workflows/continuous.yaml b/.github/workflows/continuous.yaml
index bd8cf1aa..504829ae 100644
--- a/.github/workflows/continuous.yaml
+++ b/.github/workflows/continuous.yaml
@@ -195,13 +195,14 @@ jobs:
   ####################
 
   Windows:
-    name: windows-2025 (${{ matrix.config }})
-    runs-on: windows-2025
+    name: ${{ matrix.os }} (${{ matrix.config }})
+    runs-on: ${{ matrix.os }}
     env:
       SCCACHE_GHA_ENABLED: "true"
     strategy:
       fail-fast: false
       matrix:
+        os: [windows-2025, windows-11-arm]
         config: [Release, Debug]
     steps:
       - name: Show disk space
@@ -214,9 +215,26 @@ jobs:
           fetch-depth: 0
 
       - uses: actions/setup-python@v5
+        if: matrix.os != 'windows-11-arm'
         with:
           python-version: 3.13
 
+        # On windows-11-arm the hostedtoolcache Python ships only the interpreter binary;
+        # it lacks include/ headers and libs/python3XX.lib, so CMake cannot satisfy the
+        # Development.Module component. Use uv instead: it pulls python-build-standalone
+        # distributions which include full dev files. uv defaults to x64-emulated Python
+        # on ARM64 Windows (uv PR #13724), so we must pin the aarch64 specifier.
+      - uses: astral-sh/setup-uv@v6
+        if: matrix.os == 'windows-11-arm'
+
+      - name: Install native ARM64 Python via uv
+        if: matrix.os == 'windows-11-arm'
+        shell: pwsh
+        run: |
+          uv python install cpython-3.13-windows-aarch64
+          $pyExe = (uv python find cpython-3.13-windows-aarch64).Trim()
+          echo "PYTHON_ARM64_EXE=$($pyExe -replace '\\', '/')" >> $env:GITHUB_ENV
+
       - name: Install Ninja
         uses: seanmiddleditch/gha-setup-ninja@master
 
@@ -227,10 +245,6 @@ jobs:
           # starving sccache of requests until the default 600s timeout kills the server.
           echo "SCCACHE_IDLE_TIMEOUT=0" >> ${env:GITHUB_ENV}
 
-      - name: Select embree isa (Windows)
-        if: runner.os == 'Windows'
-        run: echo "embree_max_isa=AVX2" >> ${env:GITHUB_ENV}
-
       - name: Get number of CPU cores
         uses: SimenB/github-actions-cpu-cores@v1
         id: cpu-cores
@@ -238,22 +252,54 @@ jobs:
       - name: Sccache
         uses: mozilla-actions/sccache-action@v0.0.10
 
-        # We run configure + build in the same step, since they both need to call VsDevCmd
-        # Also, cmd uses ^ to break commands into multiple lines (in powershell this is `)
-      - name: Configure and build
-        shell: cmd
+      - name: Set x64 vars
+        if: matrix.os == 'windows-2025'
+        run: |
+            echo "BUILD_DIR=D:/build" >> ${env:GITHUB_ENV}
+            echo "ARCH=x64" >> ${env:GITHUB_ENV}
+
+      - name: Set arm64 vars
+        if: matrix.os == 'windows-11-arm'
+        run: |
+            echo "BUILD_DIR=C:/build" >> ${env:GITHUB_ENV}
+            echo "ARCH=arm64" >> ${env:GITHUB_ENV}
+
+      - name: Setup MSVC Developer Command Prompt
+        uses: ilammy/msvc-dev-cmd@v1
+        with:
+          arch: ${{ env.ARCH }}
+
+        # Cmd uses ^ to break commands into multiple lines, powershell uses `
+      - name: Configure
+        if: matrix.os != 'windows-11-arm'
+        run: |
+          cmake --version
+          cmake -G Ninja `
+            -DCMAKE_BUILD_TYPE=${{ matrix.config }} `
+            -DLAGRANGE_JENKINS=ON `
+            -DLAGRANGE_ALL=ON `
+            -DLAGRANGE_POLYSCOPE_MOCK_BACKEND=ON `
+            -B ${{ env.BUILD_DIR }} `
+            -S .
+
+        # Force CMake to use the ARM64 Python (now complete with dev files)
+        # and skip the Windows registry so it doesn't fall back to x64 Python.
+      - name: Configure (ARM64)
+        if: matrix.os == 'windows-11-arm'
         run: |
-          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\VsDevCmd.bat" -arch=x64
           cmake --version
-          cmake -G Ninja ^
-            -DCMAKE_BUILD_TYPE=${{ matrix.config }} ^
-            -DLAGRANGE_JENKINS=ON ^
-            -DLAGRANGE_ALL=ON ^
-            -DLAGRANGE_POLYSCOPE_MOCK_BACKEND=ON ^
-            -DEMBREE_MAX_ISA=${{ env.embree_max_isa }} ^
-            -B "D:/build" ^
+          cmake -G Ninja `
+            -DCMAKE_BUILD_TYPE=${{ matrix.config }} `
+            -DLAGRANGE_JENKINS=ON `
+            -DLAGRANGE_ALL=ON `
+            -DLAGRANGE_POLYSCOPE_MOCK_BACKEND=ON `
+            -DPython_EXECUTABLE="$env:PYTHON_ARM64_EXE" `
+            -DPython_FIND_REGISTRY=NEVER `
+            -B ${{ env.BUILD_DIR }} `
             -S .
-          cmake --build "D:/build" -j ${{ steps.cpu-cores.outputs.count }}
+
+      - name: Build
+        run: cmake --build ${{ env.BUILD_DIR }} -j ${{ steps.cpu-cores.outputs.count }}
 
       - name: Sccache stats
         if: always()
@@ -265,4 +311,4 @@ jobs:
         run: Get-PSDrive
 
       - name: Tests
-        run: cd "D:/build"; ctest --verbose -j ${{ steps.cpu-cores.outputs.count }}
+        run: cd ${{ env.BUILD_DIR }}; ctest --verbose -j ${{ steps.cpu-cores.outputs.count }}
diff --git a/cmake/recipes/external/Boost.cmake b/cmake/recipes/external/Boost.cmake
index ac371ef1..fe826840 100644
--- a/cmake/recipes/external/Boost.cmake
+++ b/cmake/recipes/external/Boost.cmake
@@ -79,12 +79,15 @@ option(BOOST_IOSTREAMS_ENABLE_BZIP2 "Boost.Iostreams: Enable BZip2 support" OFF)
 option(BOOST_IOSTREAMS_ENABLE_LZMA "Boost.Iostreams: Enable LZMA support" OFF)
 option(BOOST_IOSTREAMS_ENABLE_ZSTD "Boost.Iostreams: Enable Zstd support" OFF)
 
-set(BOOST_PATCHES "")
+set(BOOST_PATCHES)
 if(EMSCRIPTEN)
     # Wasm doesn't have rounding mode control yet, so we trick Boost::interval into thinking it has.
     # https://github.com/WebAssembly/rounding-mode-control
     # https://github.com/boostorg/interval/issues/44
-    set(BOOST_PATCHES PATCHES Boost.wasm.patch)
+    list(APPEND BOOST_PATCHES Boost.wasm.patch)
+endif()
+if(WIN32)
+    list(APPEND BOOST_PATCHES Boost.winarm.patch)
 endif()
 
 # Modern CMake target support was added in Boost 1.82.0
@@ -95,7 +98,7 @@ CPMAddPackage(
     URL https://github.com/boostorg/boost/releases/download/boost-1.84.0/boost-1.84.0.tar.xz
     URL_HASH SHA256=2e64e5d79a738d0fa6fb546c6e5c2bd28f88d268a2a080546f74e5ff98f29d0e
     EXCLUDE_FROM_ALL ON
-    ${BOOST_PATCHES}
+    PATCHES ${BOOST_PATCHES}
 )
 
 # Due to MKL, we may require the release runtime (/MD) even when compiling in Debug mode.
diff --git a/cmake/recipes/external/Boost.winarm.patch b/cmake/recipes/external/Boost.winarm.patch
new file mode 100644
index 00000000..fc6681aa
--- /dev/null
+++ b/cmake/recipes/external/Boost.winarm.patch
@@ -0,0 +1,132 @@
+Submodule libs/context contains modified content
+diff --git i/libs/context/CMakeLists.txt w/libs/context/CMakeLists.txt
+index dca5349..bd064b1 100644
+--- i/libs/context/CMakeLists.txt
++++ w/libs/context/CMakeLists.txt
+@@ -12,7 +12,7 @@ list(APPEND CMAKE_MODULE_PATH ${boost_context_SOURCE_DIR}/cmake)
+ 
+ ## Binary format
+ 
+-if(WIN32)
++if(WIN32 OR CYGWIN)
+   set(_default_binfmt pe)
+ elseif(APPLE)
+   set(_default_binfmt mach-o)
+@@ -31,7 +31,7 @@ math(EXPR _bits "${CMAKE_SIZEOF_VOID_P}*8")
+ 
+ if(CMAKE_SYSTEM_PROCESSOR MATCHES "^[Aa][Rr][Mm]" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+   set(_default_abi aapcs)
+-elseif(WIN32)
++elseif(WIN32 OR CYGWIN)
+   set(_default_abi ms)
+ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^mips")
+   if(_bits EQUAL 32)
+@@ -60,6 +60,8 @@ elseif(_bits EQUAL 32)
+     set(_default_arch arm)
+   elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^mips")
+     set(_default_arch mips32)
++  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc32)")
++    set(_default_arch ppc32)
+   else()
+     set(_default_arch i386)
+   endif()
+@@ -69,6 +71,8 @@ else()
+     set(_default_arch arm64)
+   elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^mips")
+     set(_default_arch mips64)
++  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc64)")
++    set(_default_arch ppc64)
+   else()
+     set(_default_arch x86_64)
+   endif()
+@@ -89,18 +93,22 @@ if(MSVC)
+   else()
+     set(_default_asm masm)
+   endif()
++elseif(BOOST_CONTEXT_ARCHITECTURE STREQUAL arm64 AND MINGW)
++  set(_default_asm armclang)
+ else()
+   set(_default_asm gas)
+ endif()
+ 
+-set(BOOST_CONTEXT_ASSEMBLER "${_default_asm}" CACHE STRING "Boost.Context assembler (masm, gas, armasm)")
+-set_property(CACHE BOOST_CONTEXT_ASSEMBLER PROPERTY STRINGS masm gas armasm)
++set(BOOST_CONTEXT_ASSEMBLER "${_default_asm}" CACHE STRING "Boost.Context assembler (masm, gas, armasm, armclang)")
++set_property(CACHE BOOST_CONTEXT_ASSEMBLER PROPERTY STRINGS masm gas armasm armclang)
+ 
+ unset(_default_asm)
+ 
+ ## Assembler source suffix
+ 
+-if(BOOST_CONTEXT_BINARY_FORMAT STREQUAL pe)
++if(BOOST_CONTEXT_ASSEMBLER STREQUAL armclang)
++  set(_default_ext .S)
++elseif(BOOST_CONTEXT_BINARY_FORMAT STREQUAL pe)
+   set(_default_ext .asm)
+ elseif(BOOST_CONTEXT_ASSEMBLER STREQUAL gas)
+   set(_default_ext .S)
+@@ -133,18 +141,22 @@ message(STATUS "Boost.Context: "
+   "implementation ${BOOST_CONTEXT_IMPLEMENTATION}")
+ 
+ # Enable the right assembler
+-
++set(ASM_LANGUAGE)
+ if(BOOST_CONTEXT_IMPLEMENTATION STREQUAL "fcontext")
+-  if(BOOST_CONTEXT_ASSEMBLER STREQUAL gas)
++  if(BOOST_CONTEXT_ASSEMBLER STREQUAL gas OR BOOST_CONTEXT_ASSEMBLER STREQUAL armclang)
+     if(CMAKE_CXX_PLATFORM_ID MATCHES "Cygwin")
+-      enable_language(ASM-ATT)
++      set(ASM_LANGUAGE ASM-ATT)
+     else()
+-      enable_language(ASM)
++      set(ASM_LANGUAGE ASM)
+     endif()
+   elseif(BOOST_CONTEXT_ASSEMBLER STREQUAL armasm)
+-    enable_language(ASM_ARMASM)
++    if(MSVC)
++      set(ASM_LANGUAGE ASM_MARMASM)
++    else()
++      set(ASM_LANGUAGE ASM_ARMASM)
++    endif()
+   else()
+-    enable_language(ASM_MASM)
++    set(ASM_LANGUAGE ASM_MASM)
+   endif()
+ endif()
+ 
+@@ -170,13 +182,29 @@ if(BOOST_CONTEXT_IMPLEMENTATION STREQUAL "fcontext")
+ 
+   set(IMPL_SOURCES ${ASM_SOURCES})
+ 
+-  if(BOOST_CONTEXT_ASSEMBLER STREQUAL masm AND BOOST_CONTEXT_ARCHITECTURE STREQUAL i386)
+-      set_source_files_properties(${ASM_SOURCES} PROPERTIES COMPILE_FLAGS "/safeseh")
+-  endif()
++  if(BOOST_CONTEXT_ASSEMBLER STREQUAL masm)
+ 
+-  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+-    set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "-x" "assembler-with-cpp")
+-  endif()
++    set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "/nologo")
++
++    if(MSVC AND NOT(MSVC_VERSION LESS 1936) AND NOT(CMAKE_CXX_SIMULATE_VERSION))
++      set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "/quiet")
++    endif()
++
++    if(BOOST_CONTEXT_ARCHITECTURE STREQUAL i386)
++      set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "/safeseh")
++    endif()
++
++  # armasm doesn't support most of these options
++  elseif(NOT BOOST_CONTEXT_ASSEMBLER STREQUAL armasm) # masm
++    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
++      set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "-x" "assembler-with-cpp")
++    elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
++      set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "-Wno-unused-command-line-argument")
++    endif()
++  endif() # masm
++
++  enable_language(${ASM_LANGUAGE})
++  set_source_files_properties(${ASM_SOURCES} PROPERTIES LANGUAGE ${ASM_LANGUAGE})
+ else()
+   set(IMPL_SOURCES
+     src/continuation.cpp
diff --git a/cmake/recipes/external/embree-winarm.patch b/cmake/recipes/external/embree-winarm.patch
new file mode 100644
index 00000000..9102a25b
--- /dev/null
+++ b/cmake/recipes/external/embree-winarm.patch
@@ -0,0 +1,24 @@
+diff --git i/common/sys/intrinsics.h w/common/sys/intrinsics.h
+--- i/common/sys/intrinsics.h
++++ w/common/sys/intrinsics.h
+@@ -92,6 +92,6 @@
+ #if defined(__X86_64__) || defined (__aarch64__) || defined(_M_ARM64)
+   __forceinline size_t bsf(size_t v) {
+-#if defined(__AVX2__) 
++#if defined(__AVX2__) && !defined(_M_ARM64)
+     return _tzcnt_u64(v);
+ #else
+     unsigned long r = 0; _BitScanForward64(&r,v); return r;
+@@ -142,5 +142,5 @@
+   __forceinline size_t bsr(size_t v) {
+-#if defined(__AVX2__) 
++#if defined(__AVX2__) && !defined(_M_ARM64)
+     return 63 -_lzcnt_u64(v);
+ #else
+     unsigned long r = 0; _BitScanReverse64(&r, v); return r;
+@@ -534,4 +534,4 @@
+-#if defined(__AVX2__) && !defined(__aarch64__)
++#if defined(__AVX2__) && !defined(__aarch64__) && !defined(_M_ARM64)
+    __forceinline unsigned int pext(unsigned int a, unsigned int b) { return _pext_u32(a, b); }
+    __forceinline unsigned int pdep(unsigned int a, unsigned int b) { return _pdep_u32(a, b); }
+ #if defined(__X86_64__)
diff --git a/cmake/recipes/external/embree.cmake b/cmake/recipes/external/embree.cmake
index a323f51c..129c9b90 100644
--- a/cmake/recipes/external/embree.cmake
+++ b/cmake/recipes/external/embree.cmake
@@ -33,8 +33,10 @@ option(EMBREE_RAY_PACKETS    "Enable the usage packed ray."
 
 # Match embree's platform detection logic for arm
 if(APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" AND CMAKE_OSX_ARCHITECTURES STREQUAL "") OR ("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES))
+    message(STATUS "Setting arm version of Embree")
     set(EMBREE_ARM ON)
 elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")
+    message(STATUS "Setting arm version of Embree")
     set(EMBREE_ARM ON)
 endif()
 
@@ -120,9 +122,22 @@ function(embree_import_target)
         # https://github.com/RenderKit/embree/issues/486
         set(EMBREE_PATCHES PATCHES embree.patch)
     endif()
+    set(EMBREE_URL RenderKit/embree)
+    if(WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64" AND EMBREE_VERSION STREQUAL "v4.4.0")
+        message(STATUS "Testing winarm version of embree 4")
+        set(EMBREE_VERSION 03d8ec87213176a7e91c92a18d42e15a8a9bbbc8)
+        set(EMBREE_URL dousse-adobe/embree)
+        # The dousse-adobe fork guards x86 BMI/LZCNT/PEXT intrinsics with !defined(__aarch64__)
+        # (GCC/Clang macro) but misses _M_ARM64 (MSVC macro), causing build failures on Windows
+        # ARM64. We use git apply --ignore-whitespace instead of CPM's PATCHES (patch -p1) because
+        # git-cloned files may have CRLF line endings on Windows, which confuses patch.exe.
+        find_package(Git REQUIRED QUIET)
+        set(EMBREE_ARM64_PATCH "${CMAKE_CURRENT_LIST_DIR}/embree-winarm.patch")
+        set(EMBREE_PATCHES PATCH_COMMAND "${GIT_EXECUTABLE}" apply --ignore-whitespace "${EMBREE_ARM64_PATCH}")
+    endif()
     CPMAddPackage(
         NAME embree
-        GITHUB_REPOSITORY RenderKit/embree
+        GITHUB_REPOSITORY ${EMBREE_URL}
         GIT_TAG ${EMBREE_VERSION}
         ${EMBREE_PATCHES}
     )
diff --git a/cmake/recipes/external/gklib.cmake b/cmake/recipes/external/gklib.cmake
index d62b7ad5..322eb070 100644
--- a/cmake/recipes/external/gklib.cmake
+++ b/cmake/recipes/external/gklib.cmake
@@ -19,14 +19,14 @@ include(CPM)
 CPMAddPackage(
     NAME gklib
     GITHUB_REPOSITORY KarypisLab/GKlib
-    GIT_TAG 67c6e4322bb326a04727995775c3eafc47d7a252
+    GIT_TAG e2856c2f595b153ca1ce9258c5301dbabc4f39f5
     DOWNLOAD_ONLY ON
 )
 
-file(GLOB INC_FILES "${gklib_SOURCE_DIR}/*.h" )
-file(GLOB SRC_FILES "${gklib_SOURCE_DIR}/*.c" )
+file(GLOB INC_FILES "${gklib_SOURCE_DIR}/include/*.h" )
+file(GLOB SRC_FILES "${gklib_SOURCE_DIR}/src/*.c" )
 if(NOT MSVC)
-    list(REMOVE_ITEM SRC_FILES "${gklib_SOURCE_DIR}/gkregex.c")
+    list(REMOVE_ITEM SRC_FILES "${gklib_SOURCE_DIR}/src/gkregex.c")
 endif()
 
 add_library(GKlib STATIC ${INC_FILES} ${SRC_FILES})
@@ -35,11 +35,18 @@ add_library(GKlib::GKlib ALIAS GKlib)
 if(MSVC)
     target_compile_definitions(GKlib PUBLIC USE_GKREGEX)
     target_compile_definitions(GKlib PUBLIC "__thread=__declspec(thread)")
+    # gk_ms_stdint.h / gk_ms_inttypes.h are 2006-era polyfills for pre-VS2010 MSVC.
+    # Modern MSVC (VS2010+) ships <stdint.h> natively, but on ARM64 it defines
+    # int_fast16_t as 'int' (32-bit) while the polyfill defines it as 'int16_t',
+    # causing a redefinition error. Suppress the polyfills via their include guards
+    # and force-include the real system header so the types are still available.
+    target_compile_definitions(GKlib PUBLIC _MSC_STDINT_H_ _MSC_INTTYPES_H_)
+    target_compile_options(GKlib PUBLIC "/FIstdint.h" "/FIinttypes.h")
 endif()
 
 include(GNUInstallDirs)
 target_include_directories(GKlib SYSTEM PUBLIC
-    "$<BUILD_INTERFACE:${gklib_SOURCE_DIR}>"
+    "$<BUILD_INTERFACE:${gklib_SOURCE_DIR}/include>"
     "$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
 )
 
diff --git a/cmake/recipes/external/simde.cmake b/cmake/recipes/external/simde.cmake
index cffa746e..9a5f6cae 100644
--- a/cmake/recipes/external/simde.cmake
+++ b/cmake/recipes/external/simde.cmake
@@ -19,14 +19,8 @@ include(CPM)
 CPMAddPackage(
     NAME simde
     GITHUB_REPOSITORY simd-everywhere/simde
-    GIT_TAG 48edfa906d835525e2061fbf6062b7c326d66840
+    GIT_TAG 1747b2482589fe894d49989159421da08c2a8bcd
 )
 
-add_library(simde::simde INTERFACE IMPORTED GLOBAL)
-target_include_directories(simde::simde INTERFACE "${simde_SOURCE_DIR}")
-
 # Enables native aliases. Not ideal but makes it easier to convert old code.
-target_compile_definitions(simde::simde INTERFACE SIMDE_ENABLE_NATIVE_ALIASES)
-
-# Uncomment this line to ensure code can be compiled without native SIMD (i.e. emulates everything)
-# target_compile_definitions(simde::simde INTERFACE SIMDE_NO_NATIVE)
+target_compile_definitions(simde INTERFACE SIMDE_ENABLE_NATIVE_ALIASES)
diff --git a/cmake/recipes/external/winding-number-winarm.patch b/cmake/recipes/external/winding-number-winarm.patch
new file mode 100644
index 00000000..23d22d36
--- /dev/null
+++ b/cmake/recipes/external/winding-number-winarm.patch
@@ -0,0 +1,50 @@
+diff --git i/VM_SSEFunc.h w/VM_SSEFunc.h
+--- i/VM_SSEFunc.h
++++ w/VM_SSEFunc.h
+@@ -39,8 +39,26 @@
+ #include <simde/x86/sse.h>
+ #include <simde/x86/sse4.1.h>
+ 
+-typedef simde__m128 v4sf;
+-typedef simde__m128i v4si;
++#if defined(_MSC_VER) && defined(_M_ARM64)
++// On MSVC ARM64, simde__m128 and simde__m128i are both __n128 (the native NEON type).
++// Plain typedefs make v4sf==v4si, breaking all overloaded functions (vm_shuffle, vm_extract,
++// etc.). Use distinct wrapper structs with implicit conversions to/from the simde types.
++struct v4sf {
++    simde__m128 _v;
++    v4sf() = default;
++    SYS_FORCE_INLINE v4sf(simde__m128 v) noexcept : _v(v) {}
++    SYS_FORCE_INLINE operator simde__m128() const noexcept { return _v; }
++};
++struct v4si {
++    simde__m128i _v;
++    v4si() = default;
++    SYS_FORCE_INLINE v4si(simde__m128i v) noexcept : _v(v) {}
++    SYS_FORCE_INLINE operator simde__m128i() const noexcept { return _v; }
++};
++#else
++typedef simde__m128 v4sf;
++typedef simde__m128i v4si;
++#endif
+ 
+ #define CPU_HAS_SIMD_INSTR	1
+ #define VM_SSE_STYLE		1
+@@ -59,7 +77,7 @@
+ // MSVC has problems casting between __m128 and __m128i, so we implement a
+ // custom casting routine specifically for windows.
+ 
+-#if defined(_MSC_VER)
++#if defined(_MSC_VER) && !defined(_M_ARM64)
+ 
+ static SYS_FORCE_INLINE v4sf
+ vm_v4sf(const v4si &a)
+@@ -249,6 +267,6 @@
+ vm_splats(float a, float b, float c, float d)
+ {
+     return vm_shuffle<0,2,0,2>(
+-	    vm_shuffle<0>(simde_mm_set_ss(a), simde_mm_set_ss(b)),
+-	    vm_shuffle<0>(simde_mm_set_ss(c), simde_mm_set_ss(d)));
++	    vm_shuffle<0>(V4SF(simde_mm_set_ss(a)), V4SF(simde_mm_set_ss(b))),
++	    vm_shuffle<0>(V4SF(simde_mm_set_ss(c)), V4SF(simde_mm_set_ss(d))));
+ }
diff --git a/cmake/recipes/external/winding_number.cmake b/cmake/recipes/external/winding_number.cmake
index 970709f4..7e703d4c 100644
--- a/cmake/recipes/external/winding_number.cmake
+++ b/cmake/recipes/external/winding_number.cmake
@@ -19,10 +19,20 @@ lagrange_find_package(TBB CONFIG REQUIRED)
 include(simde)
 
 include(CPM)
+set(WINDINGNUMBER_PATCHES "")
+if(MSVC AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")
+    # On MSVC ARM64, simde__m128 and simde__m128i are both __n128, making plain typedefs
+    # identical and breaking all overloaded functions. Patch VM_SSEFunc.h to use distinct
+    # wrapper structs instead. Use git apply --ignore-whitespace for robust CRLF handling.
+    find_package(Git REQUIRED QUIET)
+    set(_wn_patch "${CMAKE_CURRENT_LIST_DIR}/winding-number-winarm.patch")
+    set(WINDINGNUMBER_PATCHES PATCH_COMMAND "${GIT_EXECUTABLE}" apply --ignore-whitespace "${_wn_patch}")
+endif()
 CPMAddPackage(
     NAME WindingNumber
     GITHUB_REPOSITORY jdumas/WindingNumber
     GIT_TAG a48b8f555b490afe7aab9159c7daaf83fa2cdf8e
+    ${WINDINGNUMBER_PATCHES}
 )
 
 set_target_properties(WindingNumber PROPERTIES FOLDER third_party)
diff --git a/modules/core/include/lagrange/utils/point_on_segment.h b/modules/core/include/lagrange/utils/point_on_segment.h
index ed768115..ae68e81d 100644
--- a/modules/core/include/lagrange/utils/point_on_segment.h
+++ b/modules/core/include/lagrange/utils/point_on_segment.h
@@ -19,10 +19,16 @@ namespace lagrange {
 namespace internal {
 
 /// @internal
-bool LA_CORE_API point_on_segment_2d(Eigen::Vector2d p, Eigen::Vector2d a, Eigen::Vector2d b);
+bool LA_CORE_API point_on_segment_2d(
+    const Eigen::Vector2d& p,
+    const Eigen::Vector2d& a,
+    const Eigen::Vector2d& b);
 
 /// @internal
-bool LA_CORE_API point_on_segment_3d(Eigen::Vector3d p, Eigen::Vector3d a, Eigen::Vector3d b);
+bool LA_CORE_API point_on_segment_3d(
+    const Eigen::Vector3d& p,
+    const Eigen::Vector3d& a,
+    const Eigen::Vector3d& b);
 
 } // namespace internal
 
diff --git a/modules/core/python/src/bind_utilities.h b/modules/core/python/src/bind_utilities.h
index 8a372d54..40bced17 100644
--- a/modules/core/python/src/bind_utilities.h
+++ b/modules/core/python/src/bind_utilities.h
@@ -1620,7 +1620,7 @@ oriented.
     m.def(
         "transform_mesh",
         [](MeshType& mesh,
-           Eigen::Matrix<Scalar, 4, 4> affine_transform,
+           const Eigen::Matrix<Scalar, 4, 4>& affine_transform,
            bool normalize_normals,
            bool normalize_tangents_bitangents,
            bool in_place) -> std::optional<MeshType> {
diff --git a/modules/core/src/mesh_cleanup/unflip_uv_triangles.cpp b/modules/core/src/mesh_cleanup/unflip_uv_triangles.cpp
index fe3cef9e..3b7969ef 100644
--- a/modules/core/src/mesh_cleanup/unflip_uv_triangles.cpp
+++ b/modules/core/src/mesh_cleanup/unflip_uv_triangles.cpp
@@ -59,7 +59,7 @@ void unflip_uv_triangles(SurfaceMesh<Scalar, Index>& mesh, const UnflipUVOptions
     };
 
     std::vector<Scalar> additional_uv_values;
-    auto update_uv = [&](Index fid, Index lv, Eigen::Matrix<Scalar, 1, 2> new_uv) {
+    auto update_uv = [&](Index fid, Index lv, const Eigen::Matrix<Scalar, 1, 2>& new_uv) {
         additional_uv_values.insert(additional_uv_values.end(), new_uv.data(), new_uv.data() + 2);
         Index old_id = uv_indices(fid, lv);
         uv_indices(fid, lv) = static_cast<Index>(uv_values_attr.get_num_elements()) +
diff --git a/modules/core/src/utils/point_on_segment.cpp b/modules/core/src/utils/point_on_segment.cpp
index 1cca2b4f..ad8bba26 100644
--- a/modules/core/src/utils/point_on_segment.cpp
+++ b/modules/core/src/utils/point_on_segment.cpp
@@ -17,24 +17,31 @@ namespace lagrange {
 
 namespace internal {
 
-bool point_on_segment_2d(Eigen::Vector2d p, Eigen::Vector2d a, Eigen::Vector2d b)
+bool point_on_segment_2d(
+    const Eigen::Vector2d& p,
+    const Eigen::Vector2d& a,
+    const Eigen::Vector2d& b)
 {
     ExactPredicatesShewchuk pred;
     auto res = pred.orient2D(p.data(), a.data(), b.data());
     if (res != 0) {
         return false;
     }
-    if (a.x() > b.x()) {
-        std::swap(a.x(), b.x());
+    double ax = a.x(), ay = a.y();
+    double bx = b.x(), by = b.y();
+    if (ax > bx) {
+        std::swap(ax, bx);
     }
-    if (a.y() > b.y()) {
-        std::swap(a.y(), b.y());
+    if (ay > by) {
+        std::swap(ay, by);
     }
-    auto ret = (a.x() <= p.x() && p.x() <= b.x() && a.y() <= p.y() && p.y() <= b.y());
-    return ret;
+    return (ax <= p.x() && p.x() <= bx && ay <= p.y() && p.y() <= by);
 }
 
-bool point_on_segment_3d(Eigen::Vector3d p, Eigen::Vector3d a, Eigen::Vector3d b)
+bool point_on_segment_3d(
+    const Eigen::Vector3d& p,
+    const Eigen::Vector3d& a,
+    const Eigen::Vector3d& b)
 {
     for (int d = 0; d < 3; ++d) {
         Eigen::Vector2d p2d(p(d), p((d + 1) % 3));
diff --git a/modules/primitive/src/primitive_utils.h b/modules/primitive/src/primitive_utils.h
index d16a1e85..125d0fe1 100644
--- a/modules/primitive/src/primitive_utils.h
+++ b/modules/primitive/src/primitive_utils.h
@@ -92,8 +92,8 @@ void add_semantic_label(
 template <typename Scalar, typename Index>
 void normalize_uv(
     SurfaceMesh<Scalar, Index>& mesh,
-    Eigen::Matrix<Scalar, 1, 2> min_uv,
-    Eigen::Matrix<Scalar, 1, 2> max_uv)
+    const Eigen::Matrix<Scalar, 1, 2>& min_uv,
+    const Eigen::Matrix<Scalar, 1, 2>& max_uv)
 {
     auto uv_mesh = uv_mesh_ref(mesh);
     auto uvs = vertex_ref(uv_mesh);
diff --git a/modules/solver/CMakeLists.txt b/modules/solver/CMakeLists.txt
index f7a2dd56..8c6a65e1 100644
--- a/modules/solver/CMakeLists.txt
+++ b/modules/solver/CMakeLists.txt
@@ -21,13 +21,17 @@ if(NOT EMSCRIPTEN AND (NOT LAGRANGE_NO_INTERNAL OR NOT SKBUILD))
     # Note: For now we avoid using MKL in our open-source Python bindings, to avoid bloating up the size of
     # the uploaded wheels. The long-term solution is to depend on the PyPI package for MKL at build-time.
 
-    include(blas) # Accelerate on macOS, MKL on other platforms
-    if(APPLE)
-        target_compile_definitions(lagrange_solver INTERFACE LA_SOLVER_ACCELERATE)
-    else()
-        target_compile_definitions(lagrange_solver INTERFACE LA_SOLVER_MKL)
+    # Intel MKL has no Windows ARM64 support; fall back to Eigen's SimplicialLDLT on that platform
+    # (DirectSolver.h uses SimplicialLDLT when LA_SOLVER_MKL is not defined).
+    if(NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64"))
+        include(blas) # Accelerate on macOS, MKL on other platforms
+        if(APPLE)
+            target_compile_definitions(lagrange_solver INTERFACE LA_SOLVER_ACCELERATE)
+        else()
+            target_compile_definitions(lagrange_solver INTERFACE LA_SOLVER_MKL)
+        endif()
+        target_link_libraries(lagrange_solver INTERFACE BLAS::BLAS)
     endif()
-    target_link_libraries(lagrange_solver INTERFACE BLAS::BLAS)
 endif()
 
 if(USE_SANITIZER MATCHES "([Tt]hread)")
diff --git a/modules/ui/include/lagrange/ui/types/AABB.h b/modules/ui/include/lagrange/ui/types/AABB.h
index a66f7132..6d2b2ec9 100644
--- a/modules/ui/include/lagrange/ui/types/AABB.h
+++ b/modules/ui/include/lagrange/ui/types/AABB.h
@@ -61,8 +61,8 @@ class LA_UI_API AABB : public Eigen::AlignedBox3f
     AABB transformed(const Eigen::Affine3f& transform) const;
 
     bool intersects_ray(
-        Eigen::Vector3f origin,
-        Eigen::Vector3f dir,
+        const Eigen::Vector3f& origin,
+        const Eigen::Vector3f& dir,
         float* tmin_out = nullptr,
         float* tmax_out = nullptr) const;
 
diff --git a/modules/ui/include/lagrange/ui/types/Camera.h b/modules/ui/include/lagrange/ui/types/Camera.h
index 3e518b38..e1e7a1a9 100644
--- a/modules/ui/include/lagrange/ui/types/Camera.h
+++ b/modules/ui/include/lagrange/ui/types/Camera.h
@@ -199,7 +199,7 @@ class LA_UI_API Camera
     void rotate_turntable(
         float yaw_delta,
         float pitch_delta,
-        Eigen::Vector3f primary_axis = Eigen::Vector3f::Zero());
+        const Eigen::Vector3f& primary_axis = Eigen::Vector3f::Zero());
 
 
     void rotate_arcball(
@@ -227,7 +227,7 @@ class LA_UI_API Camera
     ///
     /// @param viewport Orthographic rectangle
     ///
-    void set_ortho_viewport(Eigen::Vector4f viewport);
+    void set_ortho_viewport(const Eigen::Vector4f& viewport);
 
     Eigen::Vector4f get_ortho_viewport() const;
 
@@ -314,7 +314,7 @@ class LA_UI_API Camera
     ///
     /// @return Frustum planes of a region
     ///
-    Frustum get_frustum(Eigen::Vector2f min, Eigen::Vector2f max) const;
+    Frustum get_frustum(const Eigen::Vector2f& min, const Eigen::Vector2f& max) const;
 
 protected:
     void update_view();
diff --git a/modules/ui/include/lagrange/ui/types/Shader.h b/modules/ui/include/lagrange/ui/types/Shader.h
index 9eab156d..f3c5628c 100644
--- a/modules/ui/include/lagrange/ui/types/Shader.h
+++ b/modules/ui/include/lagrange/ui/types/Shader.h
@@ -153,15 +153,15 @@ struct LA_UI_API ShaderValue
     int size;
     GLenum type;
     ShaderInterface shaderInterface;
-    const ShaderValue& operator=(Eigen::Vector2f val) const;
-    const ShaderValue& operator=(Eigen::Vector3f val) const;
-    const ShaderValue& operator=(Eigen::Vector4f val) const;
-
-    const ShaderValue& operator=(Eigen::Matrix2f val) const;
-    const ShaderValue& operator=(Eigen::Matrix3f val) const;
-    const ShaderValue& operator=(Eigen::Matrix4f val) const;
-    const ShaderValue& operator=(Eigen::Affine3f val) const;
-    const ShaderValue& operator=(Eigen::Projective3f val) const;
+    const ShaderValue& operator=(const Eigen::Vector2f& val) const;
+    const ShaderValue& operator=(const Eigen::Vector3f& val) const;
+    const ShaderValue& operator=(const Eigen::Vector4f& val) const;
+
+    const ShaderValue& operator=(const Eigen::Matrix2f& val) const;
+    const ShaderValue& operator=(const Eigen::Matrix3f& val) const;
+    const ShaderValue& operator=(const Eigen::Matrix4f& val) const;
+    const ShaderValue& operator=(const Eigen::Affine3f& val) const;
+    const ShaderValue& operator=(const Eigen::Projective3f& val) const;
 
 
     const ShaderValue& operator=(double val) const;
diff --git a/modules/ui/include/lagrange/ui/utils/render.h b/modules/ui/include/lagrange/ui/utils/render.h
index 426f72e9..4cd421dc 100644
--- a/modules/ui/include/lagrange/ui/utils/render.h
+++ b/modules/ui/include/lagrange/ui/utils/render.h
@@ -54,7 +54,7 @@ LA_UI_API void set_render_pass_defaults(GLScope& scope);
 ///
 /// Returns a pair of orthogonal directions, that together with direction form a orthogonal basis
 LA_UI_API std::pair<Eigen::Vector3f, Eigen::Vector3f> compute_perpendicular_plane(
-    Eigen::Vector3f direction);
+    const Eigen::Vector3f& direction);
 
 
 } // namespace render
diff --git a/modules/ui/src/types/AABB.cpp b/modules/ui/src/types/AABB.cpp
index 158d657f..6093919d 100644
--- a/modules/ui/src/types/AABB.cpp
+++ b/modules/ui/src/types/AABB.cpp
@@ -43,8 +43,8 @@ Eigen::Affine3f AABB::get_normalization_transform(bool preserve_aspect) const
 }
 
 bool AABB::intersects_ray(
-    Eigen::Vector3f origin,
-    Eigen::Vector3f dir,
+    const Eigen::Vector3f& origin,
+    const Eigen::Vector3f& dir,
     float* tmin_out /* = nullptr*/,
     float* tmax_out /* = nullptr*/) const
 {
diff --git a/modules/ui/src/types/Camera.cpp b/modules/ui/src/types/Camera.cpp
index c8ce0f7a..4ce4c85a 100644
--- a/modules/ui/src/types/Camera.cpp
+++ b/modules/ui/src/types/Camera.cpp
@@ -311,7 +311,10 @@ void Camera::rotate_tumble(float yaw_delta, float pitch_delta)
 }
 
 
-void Camera::rotate_turntable(float yaw_delta, float pitch_delta, Eigen::Vector3f primary_axis)
+void Camera::rotate_turntable(
+    float yaw_delta,
+    float pitch_delta,
+    const Eigen::Vector3f& primary_axis)
 {
     if (primary_axis.x() != 0 || primary_axis.y() != 0 || primary_axis.z() != 0) {
         set_up(primary_axis);
@@ -400,7 +403,7 @@ void Camera::move_up(float delta)
     update_view();
 }
 
-void Camera::set_ortho_viewport(Eigen::Vector4f viewport)
+void Camera::set_ortho_viewport(const Eigen::Vector4f& viewport)
 {
     if (std::isnan(viewport.x()) || std::isnan(viewport.y()) || std::isnan(viewport.z()) ||
         std::isnan(viewport.w()))
@@ -514,7 +517,7 @@ Frustum Camera::get_frustum() const
     return get_frustum(Eigen::Vector2f(0), Eigen::Vector2f(get_window_size()));
 }
 
-Frustum Camera::get_frustum(Eigen::Vector2f min, Eigen::Vector2f max) const
+Frustum Camera::get_frustum(const Eigen::Vector2f& min, const Eigen::Vector2f& max) const
 {
     auto ray_bottom_left = cast_ray(min);
     auto ray_top_left = cast_ray({min.x(), max.y()});
diff --git a/modules/ui/src/types/Shader.cpp b/modules/ui/src/types/Shader.cpp
index 4c4b9c4a..c0ae2009 100644
--- a/modules/ui/src/types/Shader.cpp
+++ b/modules/ui/src/types/Shader.cpp
@@ -783,7 +783,7 @@ ShaderValue::set_matrices(const Eigen::Affine3f* data, int n, bool transpose /*=
 ShaderValue ShaderValue::none = {-1, 0, 0, SHADER_INTERFACE_NONE};
 
 
-const ShaderValue& ShaderValue::operator=(Eigen::Vector2f val) const
+const ShaderValue& ShaderValue::operator=(const Eigen::Vector2f& val) const
 {
     if (location == -1) return *this;
     assert(type == GL_FLOAT_VEC2);
@@ -797,7 +797,7 @@ const ShaderValue& ShaderValue::operator=(Eigen::Vector2f val) const
     return *this;
 }
 
-const ShaderValue& ShaderValue::operator=(Eigen::Vector3f val) const
+const ShaderValue& ShaderValue::operator=(const Eigen::Vector3f& val) const
 {
     if (location == -1) return *this;
     assert(type == GL_FLOAT_VEC3);
@@ -811,7 +811,7 @@ const ShaderValue& ShaderValue::operator=(Eigen::Vector3f val) const
     return *this;
 }
 
-const ShaderValue& ShaderValue::operator=(Eigen::Vector4f val) const
+const ShaderValue& ShaderValue::operator=(const Eigen::Vector4f& val) const
 {
     if (location == -1) return *this;
 
@@ -837,7 +837,7 @@ const ShaderValue& ShaderValue::operator=(Eigen::Vector4f val) const
     return *this;
 }
 
-const ShaderValue& ShaderValue::operator=(Eigen::Matrix2f val) const
+const ShaderValue& ShaderValue::operator=(const Eigen::Matrix2f& val) const
 {
     if (location == -1) return *this;
     assert(type == GL_FLOAT_MAT2 && shaderInterface == SHADER_INTERFACE_UNIFORM);
@@ -845,7 +845,7 @@ const ShaderValue& ShaderValue::operator=(Eigen::Matrix2f val) const
     return *this;
 }
 
-const ShaderValue& ShaderValue::operator=(Eigen::Matrix3f val) const
+const ShaderValue& ShaderValue::operator=(const Eigen::Matrix3f& val) const
 {
     if (location == -1) return *this;
     assert(type == GL_FLOAT_MAT3 && shaderInterface == SHADER_INTERFACE_UNIFORM);
@@ -853,7 +853,7 @@ const ShaderValue& ShaderValue::operator=(Eigen::Matrix3f val) const
     return *this;
 }
 
-const ShaderValue& ShaderValue::operator=(Eigen::Matrix4f val) const
+const ShaderValue& ShaderValue::operator=(const Eigen::Matrix4f& val) const
 {
     if (location == -1) return *this;
     assert(type == GL_FLOAT_MAT4 && shaderInterface == SHADER_INTERFACE_UNIFORM);
@@ -861,13 +861,13 @@ const ShaderValue& ShaderValue::operator=(Eigen::Matrix4f val) const
     return *this;
 }
 
-const ShaderValue& ShaderValue::operator=(Eigen::Affine3f val) const
+const ShaderValue& ShaderValue::operator=(const Eigen::Affine3f& val) const
 {
     // defer to Matrix4f
     return ((*this) = val.matrix());
 }
 
-const ShaderValue& ShaderValue::operator=(Eigen::Projective3f val) const
+const ShaderValue& ShaderValue::operator=(const Eigen::Projective3f& val) const
 {
     // defer to Matrix4f
     return ((*this) = val.matrix());
diff --git a/modules/ui/src/utils/render.cpp b/modules/ui/src/utils/render.cpp
index f0d80bc7..720b548e 100644
--- a/modules/ui/src/utils/render.cpp
+++ b/modules/ui/src/utils/render.cpp
@@ -25,9 +25,10 @@ namespace ui {
 namespace utils {
 namespace render {
 
-std::pair<Eigen::Vector3f, Eigen::Vector3f> compute_perpendicular_plane(Eigen::Vector3f direction)
+std::pair<Eigen::Vector3f, Eigen::Vector3f> compute_perpendicular_plane(
+    const Eigen::Vector3f& direction)
 {
-    Eigen::Vector3f& u1 = direction;
+    const Eigen::Vector3f& u1 = direction;
     Eigen::Vector3f v2;
 
     if (std::abs(direction.x()) == 1.0f && direction.y() == 0.0f && direction.z() == 0.0f) {