diff --git a/.github/workflows/continuous.yaml b/.github/workflows/continuous.yaml index bd8cf1aa..504829ae 100644 --- a/.github/workflows/continuous.yaml +++ b/.github/workflows/continuous.yaml @@ -195,13 +195,14 @@ jobs: #################### Windows: - name: windows-2025 (${{ matrix.config }}) - runs-on: windows-2025 + name: ${{ matrix.os }} (${{ matrix.config }}) + runs-on: ${{ matrix.os }} env: SCCACHE_GHA_ENABLED: "true" strategy: fail-fast: false matrix: + os: [windows-2025, windows-11-arm] config: [Release, Debug] steps: - name: Show disk space @@ -214,9 +215,26 @@ jobs: fetch-depth: 0 - uses: actions/setup-python@v5 + if: matrix.os != 'windows-11-arm' with: python-version: 3.13 + # On windows-11-arm the hostedtoolcache Python ships only the interpreter binary; + # it lacks include/ headers and libs/python3XX.lib, so CMake cannot satisfy the + # Development.Module component. Use uv instead: it pulls python-build-standalone + # distributions which include full dev files. uv defaults to x64-emulated Python + # on ARM64 Windows (uv PR #13724), so we must pin the aarch64 specifier. + - uses: astral-sh/setup-uv@v6 + if: matrix.os == 'windows-11-arm' + + - name: Install native ARM64 Python via uv + if: matrix.os == 'windows-11-arm' + shell: pwsh + run: | + uv python install cpython-3.13-windows-aarch64 + $pyExe = (uv python find cpython-3.13-windows-aarch64).Trim() + echo "PYTHON_ARM64_EXE=$($pyExe -replace '\\', '/')" >> $env:GITHUB_ENV + - name: Install Ninja uses: seanmiddleditch/gha-setup-ninja@master @@ -227,10 +245,6 @@ jobs: # starving sccache of requests until the default 600s timeout kills the server. echo "SCCACHE_IDLE_TIMEOUT=0" >> ${env:GITHUB_ENV} - - name: Select embree isa (Windows) - if: runner.os == 'Windows' - run: echo "embree_max_isa=AVX2" >> ${env:GITHUB_ENV} - - name: Get number of CPU cores uses: SimenB/github-actions-cpu-cores@v1 id: cpu-cores @@ -238,22 +252,54 @@ jobs: - name: Sccache uses: mozilla-actions/sccache-action@v0.0.10 - # We run configure + build in the same step, since they both need to call VsDevCmd - # Also, cmd uses ^ to break commands into multiple lines (in powershell this is `) - - name: Configure and build - shell: cmd + - name: Set x64 vars + if: matrix.os == 'windows-2025' + run: | + echo "BUILD_DIR=D:/build" >> ${env:GITHUB_ENV} + echo "ARCH=x64" >> ${env:GITHUB_ENV} + + - name: Set arm64 vars + if: matrix.os == 'windows-11-arm' + run: | + echo "BUILD_DIR=C:/build" >> ${env:GITHUB_ENV} + echo "ARCH=arm64" >> ${env:GITHUB_ENV} + + - name: Setup MSVC Developer Command Prompt + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: ${{ env.ARCH }} + + # Cmd uses ^ to break commands into multiple lines, powershell uses ` + - name: Configure + if: matrix.os != 'windows-11-arm' + run: | + cmake --version + cmake -G Ninja ` + -DCMAKE_BUILD_TYPE=${{ matrix.config }} ` + -DLAGRANGE_JENKINS=ON ` + -DLAGRANGE_ALL=ON ` + -DLAGRANGE_POLYSCOPE_MOCK_BACKEND=ON ` + -B ${{ env.BUILD_DIR }} ` + -S . + + # Force CMake to use the ARM64 Python (now complete with dev files) + # and skip the Windows registry so it doesn't fall back to x64 Python. + - name: Configure (ARM64) + if: matrix.os == 'windows-11-arm' run: | - call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\VsDevCmd.bat" -arch=x64 cmake --version - cmake -G Ninja ^ - -DCMAKE_BUILD_TYPE=${{ matrix.config }} ^ - -DLAGRANGE_JENKINS=ON ^ - -DLAGRANGE_ALL=ON ^ - -DLAGRANGE_POLYSCOPE_MOCK_BACKEND=ON ^ - -DEMBREE_MAX_ISA=${{ env.embree_max_isa }} ^ - -B "D:/build" ^ + cmake -G Ninja ` + -DCMAKE_BUILD_TYPE=${{ matrix.config }} ` + -DLAGRANGE_JENKINS=ON ` + -DLAGRANGE_ALL=ON ` + -DLAGRANGE_POLYSCOPE_MOCK_BACKEND=ON ` + -DPython_EXECUTABLE="$env:PYTHON_ARM64_EXE" ` + -DPython_FIND_REGISTRY=NEVER ` + -B ${{ env.BUILD_DIR }} ` -S . - cmake --build "D:/build" -j ${{ steps.cpu-cores.outputs.count }} + + - name: Build + run: cmake --build ${{ env.BUILD_DIR }} -j ${{ steps.cpu-cores.outputs.count }} - name: Sccache stats if: always() @@ -265,4 +311,4 @@ jobs: run: Get-PSDrive - name: Tests - run: cd "D:/build"; ctest --verbose -j ${{ steps.cpu-cores.outputs.count }} + run: cd ${{ env.BUILD_DIR }}; ctest --verbose -j ${{ steps.cpu-cores.outputs.count }} diff --git a/cmake/recipes/external/Boost.cmake b/cmake/recipes/external/Boost.cmake index ac371ef1..fe826840 100644 --- a/cmake/recipes/external/Boost.cmake +++ b/cmake/recipes/external/Boost.cmake @@ -79,12 +79,15 @@ option(BOOST_IOSTREAMS_ENABLE_BZIP2 "Boost.Iostreams: Enable BZip2 support" OFF) option(BOOST_IOSTREAMS_ENABLE_LZMA "Boost.Iostreams: Enable LZMA support" OFF) option(BOOST_IOSTREAMS_ENABLE_ZSTD "Boost.Iostreams: Enable Zstd support" OFF) -set(BOOST_PATCHES "") +set(BOOST_PATCHES) if(EMSCRIPTEN) # Wasm doesn't have rounding mode control yet, so we trick Boost::interval into thinking it has. # https://github.com/WebAssembly/rounding-mode-control # https://github.com/boostorg/interval/issues/44 - set(BOOST_PATCHES PATCHES Boost.wasm.patch) + list(APPEND BOOST_PATCHES Boost.wasm.patch) +endif() +if(WIN32) + list(APPEND BOOST_PATCHES Boost.winarm.patch) endif() # Modern CMake target support was added in Boost 1.82.0 @@ -95,7 +98,7 @@ CPMAddPackage( URL https://github.com/boostorg/boost/releases/download/boost-1.84.0/boost-1.84.0.tar.xz URL_HASH SHA256=2e64e5d79a738d0fa6fb546c6e5c2bd28f88d268a2a080546f74e5ff98f29d0e EXCLUDE_FROM_ALL ON - ${BOOST_PATCHES} + PATCHES ${BOOST_PATCHES} ) # Due to MKL, we may require the release runtime (/MD) even when compiling in Debug mode. diff --git a/cmake/recipes/external/Boost.winarm.patch b/cmake/recipes/external/Boost.winarm.patch new file mode 100644 index 00000000..fc6681aa --- /dev/null +++ b/cmake/recipes/external/Boost.winarm.patch @@ -0,0 +1,132 @@ +Submodule libs/context contains modified content +diff --git i/libs/context/CMakeLists.txt w/libs/context/CMakeLists.txt +index dca5349..bd064b1 100644 +--- i/libs/context/CMakeLists.txt ++++ w/libs/context/CMakeLists.txt +@@ -12,7 +12,7 @@ list(APPEND CMAKE_MODULE_PATH ${boost_context_SOURCE_DIR}/cmake) + + ## Binary format + +-if(WIN32) ++if(WIN32 OR CYGWIN) + set(_default_binfmt pe) + elseif(APPLE) + set(_default_binfmt mach-o) +@@ -31,7 +31,7 @@ math(EXPR _bits "${CMAKE_SIZEOF_VOID_P}*8") + + if(CMAKE_SYSTEM_PROCESSOR MATCHES "^[Aa][Rr][Mm]" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + set(_default_abi aapcs) +-elseif(WIN32) ++elseif(WIN32 OR CYGWIN) + set(_default_abi ms) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^mips") + if(_bits EQUAL 32) +@@ -60,6 +60,8 @@ elseif(_bits EQUAL 32) + set(_default_arch arm) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^mips") + set(_default_arch mips32) ++ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc32)") ++ set(_default_arch ppc32) + else() + set(_default_arch i386) + endif() +@@ -69,6 +71,8 @@ else() + set(_default_arch arm64) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^mips") + set(_default_arch mips64) ++ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc64)") ++ set(_default_arch ppc64) + else() + set(_default_arch x86_64) + endif() +@@ -89,18 +93,22 @@ if(MSVC) + else() + set(_default_asm masm) + endif() ++elseif(BOOST_CONTEXT_ARCHITECTURE STREQUAL arm64 AND MINGW) ++ set(_default_asm armclang) + else() + set(_default_asm gas) + endif() + +-set(BOOST_CONTEXT_ASSEMBLER "${_default_asm}" CACHE STRING "Boost.Context assembler (masm, gas, armasm)") +-set_property(CACHE BOOST_CONTEXT_ASSEMBLER PROPERTY STRINGS masm gas armasm) ++set(BOOST_CONTEXT_ASSEMBLER "${_default_asm}" CACHE STRING "Boost.Context assembler (masm, gas, armasm, armclang)") ++set_property(CACHE BOOST_CONTEXT_ASSEMBLER PROPERTY STRINGS masm gas armasm armclang) + + unset(_default_asm) + + ## Assembler source suffix + +-if(BOOST_CONTEXT_BINARY_FORMAT STREQUAL pe) ++if(BOOST_CONTEXT_ASSEMBLER STREQUAL armclang) ++ set(_default_ext .S) ++elseif(BOOST_CONTEXT_BINARY_FORMAT STREQUAL pe) + set(_default_ext .asm) + elseif(BOOST_CONTEXT_ASSEMBLER STREQUAL gas) + set(_default_ext .S) +@@ -133,18 +141,22 @@ message(STATUS "Boost.Context: " + "implementation ${BOOST_CONTEXT_IMPLEMENTATION}") + + # Enable the right assembler +- ++set(ASM_LANGUAGE) + if(BOOST_CONTEXT_IMPLEMENTATION STREQUAL "fcontext") +- if(BOOST_CONTEXT_ASSEMBLER STREQUAL gas) ++ if(BOOST_CONTEXT_ASSEMBLER STREQUAL gas OR BOOST_CONTEXT_ASSEMBLER STREQUAL armclang) + if(CMAKE_CXX_PLATFORM_ID MATCHES "Cygwin") +- enable_language(ASM-ATT) ++ set(ASM_LANGUAGE ASM-ATT) + else() +- enable_language(ASM) ++ set(ASM_LANGUAGE ASM) + endif() + elseif(BOOST_CONTEXT_ASSEMBLER STREQUAL armasm) +- enable_language(ASM_ARMASM) ++ if(MSVC) ++ set(ASM_LANGUAGE ASM_MARMASM) ++ else() ++ set(ASM_LANGUAGE ASM_ARMASM) ++ endif() + else() +- enable_language(ASM_MASM) ++ set(ASM_LANGUAGE ASM_MASM) + endif() + endif() + +@@ -170,13 +182,29 @@ if(BOOST_CONTEXT_IMPLEMENTATION STREQUAL "fcontext") + + set(IMPL_SOURCES ${ASM_SOURCES}) + +- if(BOOST_CONTEXT_ASSEMBLER STREQUAL masm AND BOOST_CONTEXT_ARCHITECTURE STREQUAL i386) +- set_source_files_properties(${ASM_SOURCES} PROPERTIES COMPILE_FLAGS "/safeseh") +- endif() ++ if(BOOST_CONTEXT_ASSEMBLER STREQUAL masm) + +- if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") +- set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "-x" "assembler-with-cpp") +- endif() ++ set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "/nologo") ++ ++ if(MSVC AND NOT(MSVC_VERSION LESS 1936) AND NOT(CMAKE_CXX_SIMULATE_VERSION)) ++ set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "/quiet") ++ endif() ++ ++ if(BOOST_CONTEXT_ARCHITECTURE STREQUAL i386) ++ set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "/safeseh") ++ endif() ++ ++ # armasm doesn't support most of these options ++ elseif(NOT BOOST_CONTEXT_ASSEMBLER STREQUAL armasm) # masm ++ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") ++ set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "-x" "assembler-with-cpp") ++ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") ++ set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "-Wno-unused-command-line-argument") ++ endif() ++ endif() # masm ++ ++ enable_language(${ASM_LANGUAGE}) ++ set_source_files_properties(${ASM_SOURCES} PROPERTIES LANGUAGE ${ASM_LANGUAGE}) + else() + set(IMPL_SOURCES + src/continuation.cpp diff --git a/cmake/recipes/external/embree-winarm.patch b/cmake/recipes/external/embree-winarm.patch new file mode 100644 index 00000000..9102a25b --- /dev/null +++ b/cmake/recipes/external/embree-winarm.patch @@ -0,0 +1,24 @@ +diff --git i/common/sys/intrinsics.h w/common/sys/intrinsics.h +--- i/common/sys/intrinsics.h ++++ w/common/sys/intrinsics.h +@@ -92,6 +92,6 @@ + #if defined(__X86_64__) || defined (__aarch64__) || defined(_M_ARM64) + __forceinline size_t bsf(size_t v) { +-#if defined(__AVX2__) ++#if defined(__AVX2__) && !defined(_M_ARM64) + return _tzcnt_u64(v); + #else + unsigned long r = 0; _BitScanForward64(&r,v); return r; +@@ -142,5 +142,5 @@ + __forceinline size_t bsr(size_t v) { +-#if defined(__AVX2__) ++#if defined(__AVX2__) && !defined(_M_ARM64) + return 63 -_lzcnt_u64(v); + #else + unsigned long r = 0; _BitScanReverse64(&r, v); return r; +@@ -534,4 +534,4 @@ +-#if defined(__AVX2__) && !defined(__aarch64__) ++#if defined(__AVX2__) && !defined(__aarch64__) && !defined(_M_ARM64) + __forceinline unsigned int pext(unsigned int a, unsigned int b) { return _pext_u32(a, b); } + __forceinline unsigned int pdep(unsigned int a, unsigned int b) { return _pdep_u32(a, b); } + #if defined(__X86_64__) diff --git a/cmake/recipes/external/embree.cmake b/cmake/recipes/external/embree.cmake index a323f51c..129c9b90 100644 --- a/cmake/recipes/external/embree.cmake +++ b/cmake/recipes/external/embree.cmake @@ -33,8 +33,10 @@ option(EMBREE_RAY_PACKETS "Enable the usage packed ray." # Match embree's platform detection logic for arm if(APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" AND CMAKE_OSX_ARCHITECTURES STREQUAL "") OR ("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)) + message(STATUS "Setting arm version of Embree") set(EMBREE_ARM ON) elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64") + message(STATUS "Setting arm version of Embree") set(EMBREE_ARM ON) endif() @@ -120,9 +122,22 @@ function(embree_import_target) # https://github.com/RenderKit/embree/issues/486 set(EMBREE_PATCHES PATCHES embree.patch) endif() + set(EMBREE_URL RenderKit/embree) + if(WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64" AND EMBREE_VERSION STREQUAL "v4.4.0") + message(STATUS "Testing winarm version of embree 4") + set(EMBREE_VERSION 03d8ec87213176a7e91c92a18d42e15a8a9bbbc8) + set(EMBREE_URL dousse-adobe/embree) + # The dousse-adobe fork guards x86 BMI/LZCNT/PEXT intrinsics with !defined(__aarch64__) + # (GCC/Clang macro) but misses _M_ARM64 (MSVC macro), causing build failures on Windows + # ARM64. We use git apply --ignore-whitespace instead of CPM's PATCHES (patch -p1) because + # git-cloned files may have CRLF line endings on Windows, which confuses patch.exe. + find_package(Git REQUIRED QUIET) + set(EMBREE_ARM64_PATCH "${CMAKE_CURRENT_LIST_DIR}/embree-winarm.patch") + set(EMBREE_PATCHES PATCH_COMMAND "${GIT_EXECUTABLE}" apply --ignore-whitespace "${EMBREE_ARM64_PATCH}") + endif() CPMAddPackage( NAME embree - GITHUB_REPOSITORY RenderKit/embree + GITHUB_REPOSITORY ${EMBREE_URL} GIT_TAG ${EMBREE_VERSION} ${EMBREE_PATCHES} ) diff --git a/cmake/recipes/external/gklib.cmake b/cmake/recipes/external/gklib.cmake index d62b7ad5..322eb070 100644 --- a/cmake/recipes/external/gklib.cmake +++ b/cmake/recipes/external/gklib.cmake @@ -19,14 +19,14 @@ include(CPM) CPMAddPackage( NAME gklib GITHUB_REPOSITORY KarypisLab/GKlib - GIT_TAG 67c6e4322bb326a04727995775c3eafc47d7a252 + GIT_TAG e2856c2f595b153ca1ce9258c5301dbabc4f39f5 DOWNLOAD_ONLY ON ) -file(GLOB INC_FILES "${gklib_SOURCE_DIR}/*.h" ) -file(GLOB SRC_FILES "${gklib_SOURCE_DIR}/*.c" ) +file(GLOB INC_FILES "${gklib_SOURCE_DIR}/include/*.h" ) +file(GLOB SRC_FILES "${gklib_SOURCE_DIR}/src/*.c" ) if(NOT MSVC) - list(REMOVE_ITEM SRC_FILES "${gklib_SOURCE_DIR}/gkregex.c") + list(REMOVE_ITEM SRC_FILES "${gklib_SOURCE_DIR}/src/gkregex.c") endif() add_library(GKlib STATIC ${INC_FILES} ${SRC_FILES}) @@ -35,11 +35,18 @@ add_library(GKlib::GKlib ALIAS GKlib) if(MSVC) target_compile_definitions(GKlib PUBLIC USE_GKREGEX) target_compile_definitions(GKlib PUBLIC "__thread=__declspec(thread)") + # gk_ms_stdint.h / gk_ms_inttypes.h are 2006-era polyfills for pre-VS2010 MSVC. + # Modern MSVC (VS2010+) ships natively, but on ARM64 it defines + # int_fast16_t as 'int' (32-bit) while the polyfill defines it as 'int16_t', + # causing a redefinition error. Suppress the polyfills via their include guards + # and force-include the real system header so the types are still available. + target_compile_definitions(GKlib PUBLIC _MSC_STDINT_H_ _MSC_INTTYPES_H_) + target_compile_options(GKlib PUBLIC "/FIstdint.h" "/FIinttypes.h") endif() include(GNUInstallDirs) target_include_directories(GKlib SYSTEM PUBLIC - "$" + "$" "$" ) diff --git a/cmake/recipes/external/simde.cmake b/cmake/recipes/external/simde.cmake index cffa746e..9a5f6cae 100644 --- a/cmake/recipes/external/simde.cmake +++ b/cmake/recipes/external/simde.cmake @@ -19,14 +19,8 @@ include(CPM) CPMAddPackage( NAME simde GITHUB_REPOSITORY simd-everywhere/simde - GIT_TAG 48edfa906d835525e2061fbf6062b7c326d66840 + GIT_TAG 1747b2482589fe894d49989159421da08c2a8bcd ) -add_library(simde::simde INTERFACE IMPORTED GLOBAL) -target_include_directories(simde::simde INTERFACE "${simde_SOURCE_DIR}") - # Enables native aliases. Not ideal but makes it easier to convert old code. -target_compile_definitions(simde::simde INTERFACE SIMDE_ENABLE_NATIVE_ALIASES) - -# Uncomment this line to ensure code can be compiled without native SIMD (i.e. emulates everything) -# target_compile_definitions(simde::simde INTERFACE SIMDE_NO_NATIVE) +target_compile_definitions(simde INTERFACE SIMDE_ENABLE_NATIVE_ALIASES) diff --git a/cmake/recipes/external/winding-number-winarm.patch b/cmake/recipes/external/winding-number-winarm.patch new file mode 100644 index 00000000..23d22d36 --- /dev/null +++ b/cmake/recipes/external/winding-number-winarm.patch @@ -0,0 +1,50 @@ +diff --git i/VM_SSEFunc.h w/VM_SSEFunc.h +--- i/VM_SSEFunc.h ++++ w/VM_SSEFunc.h +@@ -39,8 +39,26 @@ + #include + #include + +-typedef simde__m128 v4sf; +-typedef simde__m128i v4si; ++#if defined(_MSC_VER) && defined(_M_ARM64) ++// On MSVC ARM64, simde__m128 and simde__m128i are both __n128 (the native NEON type). ++// Plain typedefs make v4sf==v4si, breaking all overloaded functions (vm_shuffle, vm_extract, ++// etc.). Use distinct wrapper structs with implicit conversions to/from the simde types. ++struct v4sf { ++ simde__m128 _v; ++ v4sf() = default; ++ SYS_FORCE_INLINE v4sf(simde__m128 v) noexcept : _v(v) {} ++ SYS_FORCE_INLINE operator simde__m128() const noexcept { return _v; } ++}; ++struct v4si { ++ simde__m128i _v; ++ v4si() = default; ++ SYS_FORCE_INLINE v4si(simde__m128i v) noexcept : _v(v) {} ++ SYS_FORCE_INLINE operator simde__m128i() const noexcept { return _v; } ++}; ++#else ++typedef simde__m128 v4sf; ++typedef simde__m128i v4si; ++#endif + + #define CPU_HAS_SIMD_INSTR 1 + #define VM_SSE_STYLE 1 +@@ -59,7 +77,7 @@ + // MSVC has problems casting between __m128 and __m128i, so we implement a + // custom casting routine specifically for windows. + +-#if defined(_MSC_VER) ++#if defined(_MSC_VER) && !defined(_M_ARM64) + + static SYS_FORCE_INLINE v4sf + vm_v4sf(const v4si &a) +@@ -249,6 +267,6 @@ + vm_splats(float a, float b, float c, float d) + { + return vm_shuffle<0,2,0,2>( +- vm_shuffle<0>(simde_mm_set_ss(a), simde_mm_set_ss(b)), +- vm_shuffle<0>(simde_mm_set_ss(c), simde_mm_set_ss(d))); ++ vm_shuffle<0>(V4SF(simde_mm_set_ss(a)), V4SF(simde_mm_set_ss(b))), ++ vm_shuffle<0>(V4SF(simde_mm_set_ss(c)), V4SF(simde_mm_set_ss(d)))); + } diff --git a/cmake/recipes/external/winding_number.cmake b/cmake/recipes/external/winding_number.cmake index 970709f4..7e703d4c 100644 --- a/cmake/recipes/external/winding_number.cmake +++ b/cmake/recipes/external/winding_number.cmake @@ -19,10 +19,20 @@ lagrange_find_package(TBB CONFIG REQUIRED) include(simde) include(CPM) +set(WINDINGNUMBER_PATCHES "") +if(MSVC AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64") + # On MSVC ARM64, simde__m128 and simde__m128i are both __n128, making plain typedefs + # identical and breaking all overloaded functions. Patch VM_SSEFunc.h to use distinct + # wrapper structs instead. Use git apply --ignore-whitespace for robust CRLF handling. + find_package(Git REQUIRED QUIET) + set(_wn_patch "${CMAKE_CURRENT_LIST_DIR}/winding-number-winarm.patch") + set(WINDINGNUMBER_PATCHES PATCH_COMMAND "${GIT_EXECUTABLE}" apply --ignore-whitespace "${_wn_patch}") +endif() CPMAddPackage( NAME WindingNumber GITHUB_REPOSITORY jdumas/WindingNumber GIT_TAG a48b8f555b490afe7aab9159c7daaf83fa2cdf8e + ${WINDINGNUMBER_PATCHES} ) set_target_properties(WindingNumber PROPERTIES FOLDER third_party) diff --git a/modules/core/include/lagrange/utils/point_on_segment.h b/modules/core/include/lagrange/utils/point_on_segment.h index ed768115..ae68e81d 100644 --- a/modules/core/include/lagrange/utils/point_on_segment.h +++ b/modules/core/include/lagrange/utils/point_on_segment.h @@ -19,10 +19,16 @@ namespace lagrange { namespace internal { /// @internal -bool LA_CORE_API point_on_segment_2d(Eigen::Vector2d p, Eigen::Vector2d a, Eigen::Vector2d b); +bool LA_CORE_API point_on_segment_2d( + const Eigen::Vector2d& p, + const Eigen::Vector2d& a, + const Eigen::Vector2d& b); /// @internal -bool LA_CORE_API point_on_segment_3d(Eigen::Vector3d p, Eigen::Vector3d a, Eigen::Vector3d b); +bool LA_CORE_API point_on_segment_3d( + const Eigen::Vector3d& p, + const Eigen::Vector3d& a, + const Eigen::Vector3d& b); } // namespace internal diff --git a/modules/core/python/src/bind_utilities.h b/modules/core/python/src/bind_utilities.h index 8a372d54..40bced17 100644 --- a/modules/core/python/src/bind_utilities.h +++ b/modules/core/python/src/bind_utilities.h @@ -1620,7 +1620,7 @@ oriented. m.def( "transform_mesh", [](MeshType& mesh, - Eigen::Matrix affine_transform, + const Eigen::Matrix& affine_transform, bool normalize_normals, bool normalize_tangents_bitangents, bool in_place) -> std::optional { diff --git a/modules/core/src/mesh_cleanup/unflip_uv_triangles.cpp b/modules/core/src/mesh_cleanup/unflip_uv_triangles.cpp index fe3cef9e..3b7969ef 100644 --- a/modules/core/src/mesh_cleanup/unflip_uv_triangles.cpp +++ b/modules/core/src/mesh_cleanup/unflip_uv_triangles.cpp @@ -59,7 +59,7 @@ void unflip_uv_triangles(SurfaceMesh& mesh, const UnflipUVOptions }; std::vector additional_uv_values; - auto update_uv = [&](Index fid, Index lv, Eigen::Matrix new_uv) { + auto update_uv = [&](Index fid, Index lv, const Eigen::Matrix& new_uv) { additional_uv_values.insert(additional_uv_values.end(), new_uv.data(), new_uv.data() + 2); Index old_id = uv_indices(fid, lv); uv_indices(fid, lv) = static_cast(uv_values_attr.get_num_elements()) + diff --git a/modules/core/src/utils/point_on_segment.cpp b/modules/core/src/utils/point_on_segment.cpp index 1cca2b4f..ad8bba26 100644 --- a/modules/core/src/utils/point_on_segment.cpp +++ b/modules/core/src/utils/point_on_segment.cpp @@ -17,24 +17,31 @@ namespace lagrange { namespace internal { -bool point_on_segment_2d(Eigen::Vector2d p, Eigen::Vector2d a, Eigen::Vector2d b) +bool point_on_segment_2d( + const Eigen::Vector2d& p, + const Eigen::Vector2d& a, + const Eigen::Vector2d& b) { ExactPredicatesShewchuk pred; auto res = pred.orient2D(p.data(), a.data(), b.data()); if (res != 0) { return false; } - if (a.x() > b.x()) { - std::swap(a.x(), b.x()); + double ax = a.x(), ay = a.y(); + double bx = b.x(), by = b.y(); + if (ax > bx) { + std::swap(ax, bx); } - if (a.y() > b.y()) { - std::swap(a.y(), b.y()); + if (ay > by) { + std::swap(ay, by); } - auto ret = (a.x() <= p.x() && p.x() <= b.x() && a.y() <= p.y() && p.y() <= b.y()); - return ret; + return (ax <= p.x() && p.x() <= bx && ay <= p.y() && p.y() <= by); } -bool point_on_segment_3d(Eigen::Vector3d p, Eigen::Vector3d a, Eigen::Vector3d b) +bool point_on_segment_3d( + const Eigen::Vector3d& p, + const Eigen::Vector3d& a, + const Eigen::Vector3d& b) { for (int d = 0; d < 3; ++d) { Eigen::Vector2d p2d(p(d), p((d + 1) % 3)); diff --git a/modules/primitive/src/primitive_utils.h b/modules/primitive/src/primitive_utils.h index d16a1e85..125d0fe1 100644 --- a/modules/primitive/src/primitive_utils.h +++ b/modules/primitive/src/primitive_utils.h @@ -92,8 +92,8 @@ void add_semantic_label( template void normalize_uv( SurfaceMesh& mesh, - Eigen::Matrix min_uv, - Eigen::Matrix max_uv) + const Eigen::Matrix& min_uv, + const Eigen::Matrix& max_uv) { auto uv_mesh = uv_mesh_ref(mesh); auto uvs = vertex_ref(uv_mesh); diff --git a/modules/solver/CMakeLists.txt b/modules/solver/CMakeLists.txt index f7a2dd56..8c6a65e1 100644 --- a/modules/solver/CMakeLists.txt +++ b/modules/solver/CMakeLists.txt @@ -21,13 +21,17 @@ if(NOT EMSCRIPTEN AND (NOT LAGRANGE_NO_INTERNAL OR NOT SKBUILD)) # Note: For now we avoid using MKL in our open-source Python bindings, to avoid bloating up the size of # the uploaded wheels. The long-term solution is to depend on the PyPI package for MKL at build-time. - include(blas) # Accelerate on macOS, MKL on other platforms - if(APPLE) - target_compile_definitions(lagrange_solver INTERFACE LA_SOLVER_ACCELERATE) - else() - target_compile_definitions(lagrange_solver INTERFACE LA_SOLVER_MKL) + # Intel MKL has no Windows ARM64 support; fall back to Eigen's SimplicialLDLT on that platform + # (DirectSolver.h uses SimplicialLDLT when LA_SOLVER_MKL is not defined). + if(NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")) + include(blas) # Accelerate on macOS, MKL on other platforms + if(APPLE) + target_compile_definitions(lagrange_solver INTERFACE LA_SOLVER_ACCELERATE) + else() + target_compile_definitions(lagrange_solver INTERFACE LA_SOLVER_MKL) + endif() + target_link_libraries(lagrange_solver INTERFACE BLAS::BLAS) endif() - target_link_libraries(lagrange_solver INTERFACE BLAS::BLAS) endif() if(USE_SANITIZER MATCHES "([Tt]hread)") diff --git a/modules/ui/include/lagrange/ui/types/AABB.h b/modules/ui/include/lagrange/ui/types/AABB.h index a66f7132..6d2b2ec9 100644 --- a/modules/ui/include/lagrange/ui/types/AABB.h +++ b/modules/ui/include/lagrange/ui/types/AABB.h @@ -61,8 +61,8 @@ class LA_UI_API AABB : public Eigen::AlignedBox3f AABB transformed(const Eigen::Affine3f& transform) const; bool intersects_ray( - Eigen::Vector3f origin, - Eigen::Vector3f dir, + const Eigen::Vector3f& origin, + const Eigen::Vector3f& dir, float* tmin_out = nullptr, float* tmax_out = nullptr) const; diff --git a/modules/ui/include/lagrange/ui/types/Camera.h b/modules/ui/include/lagrange/ui/types/Camera.h index 3e518b38..e1e7a1a9 100644 --- a/modules/ui/include/lagrange/ui/types/Camera.h +++ b/modules/ui/include/lagrange/ui/types/Camera.h @@ -199,7 +199,7 @@ class LA_UI_API Camera void rotate_turntable( float yaw_delta, float pitch_delta, - Eigen::Vector3f primary_axis = Eigen::Vector3f::Zero()); + const Eigen::Vector3f& primary_axis = Eigen::Vector3f::Zero()); void rotate_arcball( @@ -227,7 +227,7 @@ class LA_UI_API Camera /// /// @param viewport Orthographic rectangle /// - void set_ortho_viewport(Eigen::Vector4f viewport); + void set_ortho_viewport(const Eigen::Vector4f& viewport); Eigen::Vector4f get_ortho_viewport() const; @@ -314,7 +314,7 @@ class LA_UI_API Camera /// /// @return Frustum planes of a region /// - Frustum get_frustum(Eigen::Vector2f min, Eigen::Vector2f max) const; + Frustum get_frustum(const Eigen::Vector2f& min, const Eigen::Vector2f& max) const; protected: void update_view(); diff --git a/modules/ui/include/lagrange/ui/types/Shader.h b/modules/ui/include/lagrange/ui/types/Shader.h index 9eab156d..f3c5628c 100644 --- a/modules/ui/include/lagrange/ui/types/Shader.h +++ b/modules/ui/include/lagrange/ui/types/Shader.h @@ -153,15 +153,15 @@ struct LA_UI_API ShaderValue int size; GLenum type; ShaderInterface shaderInterface; - const ShaderValue& operator=(Eigen::Vector2f val) const; - const ShaderValue& operator=(Eigen::Vector3f val) const; - const ShaderValue& operator=(Eigen::Vector4f val) const; - - const ShaderValue& operator=(Eigen::Matrix2f val) const; - const ShaderValue& operator=(Eigen::Matrix3f val) const; - const ShaderValue& operator=(Eigen::Matrix4f val) const; - const ShaderValue& operator=(Eigen::Affine3f val) const; - const ShaderValue& operator=(Eigen::Projective3f val) const; + const ShaderValue& operator=(const Eigen::Vector2f& val) const; + const ShaderValue& operator=(const Eigen::Vector3f& val) const; + const ShaderValue& operator=(const Eigen::Vector4f& val) const; + + const ShaderValue& operator=(const Eigen::Matrix2f& val) const; + const ShaderValue& operator=(const Eigen::Matrix3f& val) const; + const ShaderValue& operator=(const Eigen::Matrix4f& val) const; + const ShaderValue& operator=(const Eigen::Affine3f& val) const; + const ShaderValue& operator=(const Eigen::Projective3f& val) const; const ShaderValue& operator=(double val) const; diff --git a/modules/ui/include/lagrange/ui/utils/render.h b/modules/ui/include/lagrange/ui/utils/render.h index 426f72e9..4cd421dc 100644 --- a/modules/ui/include/lagrange/ui/utils/render.h +++ b/modules/ui/include/lagrange/ui/utils/render.h @@ -54,7 +54,7 @@ LA_UI_API void set_render_pass_defaults(GLScope& scope); /// /// Returns a pair of orthogonal directions, that together with direction form a orthogonal basis LA_UI_API std::pair compute_perpendicular_plane( - Eigen::Vector3f direction); + const Eigen::Vector3f& direction); } // namespace render diff --git a/modules/ui/src/types/AABB.cpp b/modules/ui/src/types/AABB.cpp index 158d657f..6093919d 100644 --- a/modules/ui/src/types/AABB.cpp +++ b/modules/ui/src/types/AABB.cpp @@ -43,8 +43,8 @@ Eigen::Affine3f AABB::get_normalization_transform(bool preserve_aspect) const } bool AABB::intersects_ray( - Eigen::Vector3f origin, - Eigen::Vector3f dir, + const Eigen::Vector3f& origin, + const Eigen::Vector3f& dir, float* tmin_out /* = nullptr*/, float* tmax_out /* = nullptr*/) const { diff --git a/modules/ui/src/types/Camera.cpp b/modules/ui/src/types/Camera.cpp index c8ce0f7a..4ce4c85a 100644 --- a/modules/ui/src/types/Camera.cpp +++ b/modules/ui/src/types/Camera.cpp @@ -311,7 +311,10 @@ void Camera::rotate_tumble(float yaw_delta, float pitch_delta) } -void Camera::rotate_turntable(float yaw_delta, float pitch_delta, Eigen::Vector3f primary_axis) +void Camera::rotate_turntable( + float yaw_delta, + float pitch_delta, + const Eigen::Vector3f& primary_axis) { if (primary_axis.x() != 0 || primary_axis.y() != 0 || primary_axis.z() != 0) { set_up(primary_axis); @@ -400,7 +403,7 @@ void Camera::move_up(float delta) update_view(); } -void Camera::set_ortho_viewport(Eigen::Vector4f viewport) +void Camera::set_ortho_viewport(const Eigen::Vector4f& viewport) { if (std::isnan(viewport.x()) || std::isnan(viewport.y()) || std::isnan(viewport.z()) || std::isnan(viewport.w())) @@ -514,7 +517,7 @@ Frustum Camera::get_frustum() const return get_frustum(Eigen::Vector2f(0), Eigen::Vector2f(get_window_size())); } -Frustum Camera::get_frustum(Eigen::Vector2f min, Eigen::Vector2f max) const +Frustum Camera::get_frustum(const Eigen::Vector2f& min, const Eigen::Vector2f& max) const { auto ray_bottom_left = cast_ray(min); auto ray_top_left = cast_ray({min.x(), max.y()}); diff --git a/modules/ui/src/types/Shader.cpp b/modules/ui/src/types/Shader.cpp index 4c4b9c4a..c0ae2009 100644 --- a/modules/ui/src/types/Shader.cpp +++ b/modules/ui/src/types/Shader.cpp @@ -783,7 +783,7 @@ ShaderValue::set_matrices(const Eigen::Affine3f* data, int n, bool transpose /*= ShaderValue ShaderValue::none = {-1, 0, 0, SHADER_INTERFACE_NONE}; -const ShaderValue& ShaderValue::operator=(Eigen::Vector2f val) const +const ShaderValue& ShaderValue::operator=(const Eigen::Vector2f& val) const { if (location == -1) return *this; assert(type == GL_FLOAT_VEC2); @@ -797,7 +797,7 @@ const ShaderValue& ShaderValue::operator=(Eigen::Vector2f val) const return *this; } -const ShaderValue& ShaderValue::operator=(Eigen::Vector3f val) const +const ShaderValue& ShaderValue::operator=(const Eigen::Vector3f& val) const { if (location == -1) return *this; assert(type == GL_FLOAT_VEC3); @@ -811,7 +811,7 @@ const ShaderValue& ShaderValue::operator=(Eigen::Vector3f val) const return *this; } -const ShaderValue& ShaderValue::operator=(Eigen::Vector4f val) const +const ShaderValue& ShaderValue::operator=(const Eigen::Vector4f& val) const { if (location == -1) return *this; @@ -837,7 +837,7 @@ const ShaderValue& ShaderValue::operator=(Eigen::Vector4f val) const return *this; } -const ShaderValue& ShaderValue::operator=(Eigen::Matrix2f val) const +const ShaderValue& ShaderValue::operator=(const Eigen::Matrix2f& val) const { if (location == -1) return *this; assert(type == GL_FLOAT_MAT2 && shaderInterface == SHADER_INTERFACE_UNIFORM); @@ -845,7 +845,7 @@ const ShaderValue& ShaderValue::operator=(Eigen::Matrix2f val) const return *this; } -const ShaderValue& ShaderValue::operator=(Eigen::Matrix3f val) const +const ShaderValue& ShaderValue::operator=(const Eigen::Matrix3f& val) const { if (location == -1) return *this; assert(type == GL_FLOAT_MAT3 && shaderInterface == SHADER_INTERFACE_UNIFORM); @@ -853,7 +853,7 @@ const ShaderValue& ShaderValue::operator=(Eigen::Matrix3f val) const return *this; } -const ShaderValue& ShaderValue::operator=(Eigen::Matrix4f val) const +const ShaderValue& ShaderValue::operator=(const Eigen::Matrix4f& val) const { if (location == -1) return *this; assert(type == GL_FLOAT_MAT4 && shaderInterface == SHADER_INTERFACE_UNIFORM); @@ -861,13 +861,13 @@ const ShaderValue& ShaderValue::operator=(Eigen::Matrix4f val) const return *this; } -const ShaderValue& ShaderValue::operator=(Eigen::Affine3f val) const +const ShaderValue& ShaderValue::operator=(const Eigen::Affine3f& val) const { // defer to Matrix4f return ((*this) = val.matrix()); } -const ShaderValue& ShaderValue::operator=(Eigen::Projective3f val) const +const ShaderValue& ShaderValue::operator=(const Eigen::Projective3f& val) const { // defer to Matrix4f return ((*this) = val.matrix()); diff --git a/modules/ui/src/utils/render.cpp b/modules/ui/src/utils/render.cpp index f0d80bc7..720b548e 100644 --- a/modules/ui/src/utils/render.cpp +++ b/modules/ui/src/utils/render.cpp @@ -25,9 +25,10 @@ namespace ui { namespace utils { namespace render { -std::pair compute_perpendicular_plane(Eigen::Vector3f direction) +std::pair compute_perpendicular_plane( + const Eigen::Vector3f& direction) { - Eigen::Vector3f& u1 = direction; + const Eigen::Vector3f& u1 = direction; Eigen::Vector3f v2; if (std::abs(direction.x()) == 1.0f && direction.y() == 0.0f && direction.z() == 0.0f) {