-
Notifications
You must be signed in to change notification settings - Fork 178
Improved error handling of long paths for EXE installers #1228
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
5d68300
85c1bb5
dbd5521
ddcb0b0
c66ac79
28f6004
5add07a
dec710e
d0e5679
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,10 +13,11 @@ | |
| import sys | ||
| import tempfile | ||
| from collections import defaultdict | ||
| from collections.abc import Iterable | ||
| from itertools import groupby | ||
| from os.path import abspath, expanduser, isdir, join | ||
| from subprocess import check_call | ||
| from typing import TYPE_CHECKING | ||
| from typing import TYPE_CHECKING, Literal | ||
|
|
||
| from constructor.utils import filename_dist | ||
|
|
||
|
|
@@ -144,15 +145,43 @@ def _fetch(download_dir, precs): | |
| return list(dict.fromkeys(PrefixGraph(pc.iter_records()).graph)) | ||
|
|
||
|
|
||
| def check_duplicates_files(pc_recs, platform, duplicate_files="error"): | ||
| def check_duplicates_files( | ||
| pc_recs: Iterable["PackageCacheRecord"], | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you add |
||
| platform: str, | ||
| duplicate_files: Literal["error", "warn", "skip"] = "error", | ||
| env_prefixes: dict["PackageCacheRecord", str] | None = None, | ||
| ) -> tuple[int, int, int]: | ||
| """ | ||
| Check for duplicate files across packages and compute size/path metrics. | ||
|
|
||
| Iterates through all files in the provided package cache records to: | ||
| 1. Detect duplicate files (same path in multiple packages) | ||
| 2. Compute approximate tarball and extracted sizes | ||
| 3. Track the longest relative file path (for MAX_PATH validation on Windows) | ||
|
|
||
| Args: | ||
| pc_recs: Package cache records to check. | ||
| platform: Target platform string (e.g., "win-64", "linux-64"). | ||
| duplicate_files: How to handle duplicates - "error", "warn", or "skip". | ||
| env_prefixes: Optional dict mapping PackageCacheRecord -> path prefix string. | ||
| Used to account for extra_envs paths which are installed under | ||
| "envs/<name>/" rather than the base install directory. Records not | ||
| in this dict are assumed to be in the base environment (no prefix). | ||
|
|
||
| Returns: | ||
| Tuple of (approx_tarball_size, approx_extracted_size, max_relative_path_length) | ||
| """ | ||
| assert duplicate_files in ("warn", "skip", "error") | ||
| if env_prefixes is None: | ||
| env_prefixes = {} | ||
|
|
||
| map_members_scase = defaultdict(set) | ||
| map_members_icase = defaultdict(lambda: {"files": set(), "fns": set()}) | ||
|
|
||
| # Keep a min, 50MB buffer size | ||
| total_tarball_size = 52428800 | ||
| total_extracted_pkgs_size = 52428800 | ||
| max_relative_path_length = 0 | ||
|
|
||
| for pc_rec in pc_recs: | ||
| fn = pc_rec.fn | ||
|
|
@@ -161,8 +190,12 @@ def check_duplicates_files(pc_recs, platform, duplicate_files="error"): | |
| total_tarball_size += int(pc_rec.get("size", 0)) | ||
|
|
||
| paths_data = read_paths_json(extracted_package_dir).paths | ||
| env_prefix_len = len(env_prefixes.get(pc_rec, "")) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does the |
||
| for path_data in paths_data: | ||
| short_path = path_data.path | ||
| max_relative_path_length = max( | ||
| max_relative_path_length, env_prefix_len + len(short_path) | ||
| ) | ||
| try: | ||
| size = path_data.size_in_bytes or getsize(join(extracted_package_dir, short_path)) | ||
| except AttributeError: | ||
|
|
@@ -176,7 +209,7 @@ def check_duplicates_files(pc_recs, platform, duplicate_files="error"): | |
| map_members_icase[short_path_lower]["fns"].add(fn) | ||
|
|
||
| if duplicate_files == "skip": | ||
| return total_tarball_size, total_extracted_pkgs_size | ||
| return total_tarball_size, total_extracted_pkgs_size, max_relative_path_length | ||
|
|
||
| logger.info("Checking for duplicate files ...") | ||
| for member in map_members_scase: | ||
|
|
@@ -201,7 +234,7 @@ def check_duplicates_files(pc_recs, platform, duplicate_files="error"): | |
| else: | ||
| sys.exit(f"Error: {msg_str}") | ||
|
|
||
| return total_tarball_size, total_extracted_pkgs_size | ||
| return total_tarball_size, total_extracted_pkgs_size, max_relative_path_length | ||
|
|
||
|
|
||
| def _precs_from_environment(environment, input_dir): | ||
|
|
@@ -443,18 +476,24 @@ def _main( | |
| input_dir=input_dir, | ||
| ) | ||
| if dry_run: | ||
| return None, None, None, None, None, None, None, None | ||
| return None, None, None, None, None, None, None, None, None | ||
| pc_recs, _urls, dists, has_conda = _fetch_precs( | ||
| precs, download_dir, transmute_file_type=transmute_file_type | ||
| ) | ||
| all_pc_recs = pc_recs.copy() | ||
|
|
||
| extra_envs_data = {} | ||
| env_prefixes = {} # Maps pc_rec -> "envs/<name>/" prefix for max path calculation | ||
| for env_name, env_precs in extra_envs_precs.items(): | ||
| env_pc_recs, env_urls, env_dists, _ = _fetch_precs( | ||
| env_precs, download_dir, transmute_file_type=transmute_file_type | ||
| ) | ||
| extra_envs_data[env_name] = {"_urls": env_urls, "_dists": env_dists, "_records": env_precs} | ||
| env_prefix = f"envs/{env_name}/" | ||
| for pc_rec in env_pc_recs: | ||
| existing_prefix = env_prefixes.get(pc_rec, "") | ||
| if len(env_prefix) > len(existing_prefix): | ||
| env_prefixes[pc_rec] = env_prefix | ||
| all_pc_recs += env_pc_recs | ||
|
|
||
| duplicate_files = "warn" if ignore_duplicate_files else "error" | ||
|
|
@@ -463,8 +502,12 @@ def _main( | |
| duplicate_files = "skip" | ||
|
|
||
| all_pc_recs = list({rec: None for rec in all_pc_recs}) # deduplicate | ||
| approx_tarballs_size, approx_pkgs_size = check_duplicates_files( | ||
| pc_recs, platform, duplicate_files=duplicate_files | ||
| # Pass all_pc_recs (base + extra_envs) to check_duplicates_files: | ||
| # - When extra_envs exists, duplicate_files="skip" so only sizes and max path are computed | ||
| # - When no extra_envs, all_pc_recs == pc_recs | ||
| # - env_prefixes dict ensures max path accounts for "envs/<name>/" prefix in extra_envs | ||
| approx_tarballs_size, approx_pkgs_size, max_relative_path_length = check_duplicates_files( | ||
| all_pc_recs, platform, duplicate_files=duplicate_files, env_prefixes=env_prefixes | ||
| ) | ||
|
|
||
| return ( | ||
|
|
@@ -476,6 +519,7 @@ def _main( | |
| approx_pkgs_size, | ||
| has_conda, | ||
| extra_envs_data, | ||
| max_relative_path_length, | ||
| ) | ||
|
|
||
|
|
||
|
|
@@ -529,6 +573,7 @@ def main(info, verbose=True, dry_run=False, conda_exe="conda.exe"): | |
| approx_pkgs_size, | ||
| has_conda, | ||
| extra_envs_info, | ||
| max_relative_path_length, | ||
| ) = _main( | ||
| name, | ||
| version, | ||
|
|
@@ -561,3 +606,4 @@ def main(info, verbose=True, dry_run=False, conda_exe="conda.exe"): | |
| info["_has_conda"] = has_conda | ||
| # contains {env_name: [_dists, _urls, _records]} for each extra environment | ||
| info["_extra_envs_info"] = extra_envs_info | ||
| info["_max_relative_path_length"] = max_relative_path_length | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -130,6 +130,7 @@ ${Using:StrFunc} StrStr | |
| # are not written into install.log. STEP_LOG creates an intermittent file | ||
| # that is output into these streams after the commands finish. | ||
| !define STEP_LOG "$INSTDIR\.step.log" | ||
| !define MAX_RELATIVE_PATH_LENGTH {{ max_relative_path_length }} | ||
|
|
||
| var /global INIT_CONDA | ||
| var /global REG_PY | ||
|
|
@@ -1115,6 +1116,26 @@ Function OnDirectoryLeave | |
| ${EndIf} | ||
| ${EndIf} | ||
|
|
||
| # MAX_PATH check - installation will fail if path exceeds 260 characters | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The path-length handling logic before this section needs to be trimmed. We can get rid of all the |
||
| ${If} $LongPathsEnabled == "0" | ||
| IntOp $0 $InstDirLen + ${MAX_RELATIVE_PATH_LENGTH} | ||
| IntOp $0 $0 + 1 # Account for path separator between install dir and relative path | ||
| ${If} $0 > 260 | ||
| IntOp $1 260 - ${MAX_RELATIVE_PATH_LENGTH} | ||
| IntOp $1 $1 - 1 # Account for path separator | ||
| ${If} ${Silent} | ||
| ${Print} "::error:: The installation path is too long. Your path is $InstDirLen characters, but must be at most $1 characters." | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Installers automatically enable long paths if they have administrative privileges. We should keep it that way.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, see 5add07a |
||
| ${Else} | ||
| MessageBox MB_OK|MB_ICONSTOP \ | ||
| "The installation path is too long.$\n$\n\ | ||
| Your path is $InstDirLen characters, but must be at most $1 characters.$\n$\n\ | ||
| Please choose a shorter installation path." \ | ||
| /SD IDOK | ||
| ${EndIf} | ||
| Abort | ||
| ${EndIf} | ||
| ${EndIf} | ||
|
|
||
| # Call the CheckForSpaces function. | ||
| Push $INSTDIR # Input string (install path). | ||
| Call CheckForSpaces | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,6 @@ | ||
| pytest | ||
| pytest-cov | ||
| pytest-mock | ||
| coverage | ||
| jinja2 | ||
| ruamel.yaml |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,7 @@ | |
|
|
||
| import pytest | ||
|
|
||
| from constructor.fcp import check_duplicates, exclude_packages | ||
| from constructor.fcp import check_duplicates, check_duplicates_files, exclude_packages | ||
|
|
||
|
|
||
| class GenericObject: | ||
|
|
@@ -64,3 +64,101 @@ def test_exclude_packages(values: tuple[..., GenericObject], expected_value): | |
| with context: | ||
| packages = exclude_packages(*values) | ||
| assert packages == expected_value | ||
|
|
||
|
|
||
| class MockPathData: | ||
| """Represents a file path entry.""" | ||
|
|
||
| def __init__(self, path): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add typing to new code where we can.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done 5add07a |
||
| self.path = path | ||
| self.size_in_bytes = 1 # must be non-zero to avoid filesystem access | ||
|
|
||
|
|
||
| class MockPathsJson: | ||
| """Wraps a list of MockPathData entries.""" | ||
|
|
||
| def __init__(self, paths): | ||
| self.paths = [MockPathData(p) for p in paths] | ||
|
|
||
|
|
||
| class MockPackageCacheRecord: | ||
| """Represents a package record.""" | ||
|
|
||
| def __init__(self, fn, extracted_package_dir, paths): | ||
| self.fn = fn | ||
| self.extracted_package_dir = extracted_package_dir | ||
| self._paths = paths | ||
|
|
||
| def get(self, key, default=None): | ||
| return default | ||
|
|
||
|
|
||
| def test_check_duplicates_files_returns_max_path_length(mocker): | ||
| """Verify function returns max path length as third tuple element.""" | ||
| pc_rec1 = MockPackageCacheRecord( | ||
| fn="pkg1-1.0.tar.bz2", | ||
| extracted_package_dir="/cache/pkg1", | ||
| paths=["lib/short.py"], # 12 chars | ||
| ) | ||
| pc_rec2 = MockPackageCacheRecord( | ||
| fn="pkg2-1.0.tar.bz2", | ||
| extracted_package_dir="/cache/pkg2", | ||
| paths=["lib/python3.10/site-packages/longer.py"], # 38 chars | ||
| ) | ||
|
|
||
| def read_paths_side_effect(extracted_dir): | ||
| if extracted_dir == "/cache/pkg1": | ||
| return MockPathsJson(pc_rec1._paths) | ||
| return MockPathsJson(pc_rec2._paths) | ||
|
|
||
| mock_read_paths = mocker.patch("constructor.fcp.read_paths_json") | ||
| mock_read_paths.side_effect = read_paths_side_effect | ||
|
|
||
| result = check_duplicates_files([pc_rec1, pc_rec2], "win-64", duplicate_files="skip") | ||
|
|
||
| assert len(result) == 3 | ||
| _, _, max_path_len = result | ||
| assert max_path_len == 38 | ||
|
|
||
|
|
||
| def test_check_duplicates_files_empty_packages(mocker): | ||
| """Verify returns 0 when no packages provided.""" | ||
| mock_read_paths = mocker.patch("constructor.fcp.read_paths_json") | ||
|
|
||
| result = check_duplicates_files([], "win-64", duplicate_files="skip") | ||
|
|
||
| assert len(result) == 3 | ||
| assert result[2] == 0 | ||
| mock_read_paths.assert_not_called() | ||
|
|
||
|
|
||
| def test_check_duplicates_files_with_env_prefixes(mocker): | ||
| """Verify env_prefixes adds prefix length to max path calculation.""" | ||
| pc_rec_base = MockPackageCacheRecord( | ||
| fn="base-pkg-1.0.tar.bz2", | ||
| extracted_package_dir="/cache/base-pkg", | ||
| paths=["lib/short.py"], # 12 chars, no prefix -> 12 | ||
| ) | ||
| pc_rec_env = MockPackageCacheRecord( | ||
| fn="env-pkg-1.0.tar.bz2", | ||
| extracted_package_dir="/cache/env-pkg", | ||
| paths=["lib/short.py"], # 12 chars, with "envs/myenv/" prefix (11) -> 23 | ||
| ) | ||
|
|
||
| def read_paths_side_effect(extracted_dir): | ||
| if extracted_dir == "/cache/base-pkg": | ||
| return MockPathsJson(pc_rec_base._paths) | ||
| return MockPathsJson(pc_rec_env._paths) | ||
|
|
||
| mock_read_paths = mocker.patch("constructor.fcp.read_paths_json") | ||
| mock_read_paths.side_effect = read_paths_side_effect | ||
|
|
||
| env_prefixes = {pc_rec_env: "envs/myenv/"} | ||
| result = check_duplicates_files( | ||
| [pc_rec_base, pc_rec_env], "win-64", duplicate_files="skip", env_prefixes=env_prefixes | ||
| ) | ||
|
|
||
| assert len(result) == 3 | ||
| _, _, max_path_len = result | ||
| # "envs/myenv/" (11) + "lib/short.py" (12) = 23 | ||
| assert max_path_len == 23 | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this is deprecated, we should warn about it. In fact, I would remove that template parameter entirely form the NSIS template.