Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions CONSTRUCT.md
Original file line number Diff line number Diff line change
Expand Up @@ -552,11 +552,11 @@ interactive installation. (Windows only).

Only applies if `register_python` is true.

### `check_path_length`
### ~~`check_path_length`~~

Check the length of the path where the distribution is installed to ensure nodejs
can be installed. Raise a message to request shorter paths (less than 46 character)
or enable long paths on windows > 10 (require admin right). Default is False. (Windows only).
_Deprecated_. Path length validation is now always performed using the computed
maximum relative path length from the package contents. This option will be
removed in a future version. (Windows only).

### `check_path_spaces`

Expand Down
8 changes: 4 additions & 4 deletions constructor/_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,11 +721,11 @@ class ConstructorConfiguration(BaseModel):

Only applies if `register_python` is true.
"""
check_path_length: bool = False
check_path_length: bool = Field(False, deprecated=True)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is deprecated, we should warn about it. In fact, I would remove that template parameter entirely form the NSIS template.

"""
Check the length of the path where the distribution is installed to ensure nodejs
can be installed. Raise a message to request shorter paths (less than 46 character)
or enable long paths on windows > 10 (require admin right). Default is False. (Windows only).
_Deprecated_. Path length validation is now always performed using the computed
maximum relative path length from the package contents. This option will be
removed in a future version. (Windows only).
"""
check_path_spaces: bool = True
"""
Expand Down
3 changes: 2 additions & 1 deletion constructor/data/construct.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,8 @@
},
"check_path_length": {
"default": false,
"description": "Check the length of the path where the distribution is installed to ensure nodejs can be installed. Raise a message to request shorter paths (less than 46 character) or enable long paths on windows > 10 (require admin right). Default is False. (Windows only).",
"deprecated": true,
"description": "_Deprecated_. Path length validation is now always performed using the computed maximum relative path length from the package contents. This option will be removed in a future version. (Windows only).",
"title": "Check Path Length",
"type": "boolean"
},
Expand Down
60 changes: 53 additions & 7 deletions constructor/fcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
import sys
import tempfile
from collections import defaultdict
from collections.abc import Iterable
from itertools import groupby
from os.path import abspath, expanduser, isdir, join
from subprocess import check_call
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Literal

from constructor.utils import filename_dist

Expand Down Expand Up @@ -144,15 +145,43 @@ def _fetch(download_dir, precs):
return list(dict.fromkeys(PrefixGraph(pc.iter_records()).graph))


def check_duplicates_files(pc_recs, platform, duplicate_files="error"):
def check_duplicates_files(
pc_recs: Iterable["PackageCacheRecord"],
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you add from __future__ import annotations, you don't need the strings representation. You can then place Literal and Iterable into the type checking block.

platform: str,
duplicate_files: Literal["error", "warn", "skip"] = "error",
env_prefixes: dict["PackageCacheRecord", str] | None = None,
) -> tuple[int, int, int]:
"""
Check for duplicate files across packages and compute size/path metrics.

Iterates through all files in the provided package cache records to:
1. Detect duplicate files (same path in multiple packages)
2. Compute approximate tarball and extracted sizes
3. Track the longest relative file path (for MAX_PATH validation on Windows)

Args:
pc_recs: Package cache records to check.
platform: Target platform string (e.g., "win-64", "linux-64").
duplicate_files: How to handle duplicates - "error", "warn", or "skip".
env_prefixes: Optional dict mapping PackageCacheRecord -> path prefix string.
Used to account for extra_envs paths which are installed under
"envs/<name>/" rather than the base install directory. Records not
in this dict are assumed to be in the base environment (no prefix).

Returns:
Tuple of (approx_tarball_size, approx_extracted_size, max_relative_path_length)
"""
assert duplicate_files in ("warn", "skip", "error")
if env_prefixes is None:
env_prefixes = {}

map_members_scase = defaultdict(set)
map_members_icase = defaultdict(lambda: {"files": set(), "fns": set()})

# Keep a min, 50MB buffer size
total_tarball_size = 52428800
total_extracted_pkgs_size = 52428800
max_relative_path_length = 0

for pc_rec in pc_recs:
fn = pc_rec.fn
Expand All @@ -161,8 +190,12 @@ def check_duplicates_files(pc_recs, platform, duplicate_files="error"):
total_tarball_size += int(pc_rec.get("size", 0))

paths_data = read_paths_json(extracted_package_dir).paths
env_prefix_len = len(env_prefixes.get(pc_rec, ""))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the env_prefix contain the trailing (back)slash? Might be worth documenting as a comment.

for path_data in paths_data:
short_path = path_data.path
max_relative_path_length = max(
max_relative_path_length, env_prefix_len + len(short_path)
)
try:
size = path_data.size_in_bytes or getsize(join(extracted_package_dir, short_path))
except AttributeError:
Expand All @@ -176,7 +209,7 @@ def check_duplicates_files(pc_recs, platform, duplicate_files="error"):
map_members_icase[short_path_lower]["fns"].add(fn)

if duplicate_files == "skip":
return total_tarball_size, total_extracted_pkgs_size
return total_tarball_size, total_extracted_pkgs_size, max_relative_path_length

logger.info("Checking for duplicate files ...")
for member in map_members_scase:
Expand All @@ -201,7 +234,7 @@ def check_duplicates_files(pc_recs, platform, duplicate_files="error"):
else:
sys.exit(f"Error: {msg_str}")

return total_tarball_size, total_extracted_pkgs_size
return total_tarball_size, total_extracted_pkgs_size, max_relative_path_length


def _precs_from_environment(environment, input_dir):
Expand Down Expand Up @@ -443,18 +476,24 @@ def _main(
input_dir=input_dir,
)
if dry_run:
return None, None, None, None, None, None, None, None
return None, None, None, None, None, None, None, None, None
pc_recs, _urls, dists, has_conda = _fetch_precs(
precs, download_dir, transmute_file_type=transmute_file_type
)
all_pc_recs = pc_recs.copy()

extra_envs_data = {}
env_prefixes = {} # Maps pc_rec -> "envs/<name>/" prefix for max path calculation
for env_name, env_precs in extra_envs_precs.items():
env_pc_recs, env_urls, env_dists, _ = _fetch_precs(
env_precs, download_dir, transmute_file_type=transmute_file_type
)
extra_envs_data[env_name] = {"_urls": env_urls, "_dists": env_dists, "_records": env_precs}
env_prefix = f"envs/{env_name}/"
for pc_rec in env_pc_recs:
existing_prefix = env_prefixes.get(pc_rec, "")
if len(env_prefix) > len(existing_prefix):
env_prefixes[pc_rec] = env_prefix
all_pc_recs += env_pc_recs

duplicate_files = "warn" if ignore_duplicate_files else "error"
Expand All @@ -463,8 +502,12 @@ def _main(
duplicate_files = "skip"

all_pc_recs = list({rec: None for rec in all_pc_recs}) # deduplicate
approx_tarballs_size, approx_pkgs_size = check_duplicates_files(
pc_recs, platform, duplicate_files=duplicate_files
# Pass all_pc_recs (base + extra_envs) to check_duplicates_files:
# - When extra_envs exists, duplicate_files="skip" so only sizes and max path are computed
# - When no extra_envs, all_pc_recs == pc_recs
# - env_prefixes dict ensures max path accounts for "envs/<name>/" prefix in extra_envs
approx_tarballs_size, approx_pkgs_size, max_relative_path_length = check_duplicates_files(
all_pc_recs, platform, duplicate_files=duplicate_files, env_prefixes=env_prefixes
)

return (
Expand All @@ -476,6 +519,7 @@ def _main(
approx_pkgs_size,
has_conda,
extra_envs_data,
max_relative_path_length,
)


Expand Down Expand Up @@ -529,6 +573,7 @@ def main(info, verbose=True, dry_run=False, conda_exe="conda.exe"):
approx_pkgs_size,
has_conda,
extra_envs_info,
max_relative_path_length,
) = _main(
name,
version,
Expand Down Expand Up @@ -561,3 +606,4 @@ def main(info, verbose=True, dry_run=False, conda_exe="conda.exe"):
info["_has_conda"] = has_conda
# contains {env_name: [_dists, _urls, _records]} for each extra environment
info["_extra_envs_info"] = extra_envs_info
info["_max_relative_path_length"] = max_relative_path_length
21 changes: 21 additions & 0 deletions constructor/nsis/main.nsi.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ ${Using:StrFunc} StrStr
# are not written into install.log. STEP_LOG creates an intermittent file
# that is output into these streams after the commands finish.
!define STEP_LOG "$INSTDIR\.step.log"
!define MAX_RELATIVE_PATH_LENGTH {{ max_relative_path_length }}

var /global INIT_CONDA
var /global REG_PY
Expand Down Expand Up @@ -1115,6 +1116,26 @@ Function OnDirectoryLeave
${EndIf}
${EndIf}

# MAX_PATH check - installation will fail if path exceeds 260 characters
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The path-length handling logic before this section needs to be trimmed. We can get rid of all the $CheckPathLength logic.

${If} $LongPathsEnabled == "0"
IntOp $0 $InstDirLen + ${MAX_RELATIVE_PATH_LENGTH}
IntOp $0 $0 + 1 # Account for path separator between install dir and relative path
${If} $0 > 260
IntOp $1 260 - ${MAX_RELATIVE_PATH_LENGTH}
IntOp $1 $1 - 1 # Account for path separator
${If} ${Silent}
${Print} "::error:: The installation path is too long. Your path is $InstDirLen characters, but must be at most $1 characters."
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Installers automatically enable long paths if they have administrative privileges. We should keep it that way.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, see 5add07a

${Else}
MessageBox MB_OK|MB_ICONSTOP \
"The installation path is too long.$\n$\n\
Your path is $InstDirLen characters, but must be at most $1 characters.$\n$\n\
Please choose a shorter installation path." \
/SD IDOK
${EndIf}
Abort
${EndIf}
${EndIf}

# Call the CheckForSpaces function.
Push $INSTDIR # Input string (install path).
Call CheckForSpaces
Expand Down
1 change: 1 addition & 0 deletions constructor/winexe.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ def make_nsi(
variables["initialize_by_default"] = info.get("initialize_by_default", None)
variables["check_path_length"] = info.get("check_path_length", False)
variables["check_path_spaces"] = info.get("check_path_spaces", True)
variables["max_relative_path_length"] = info.get("_max_relative_path_length", 0)
variables["keep_pkgs"] = info.get("keep_pkgs") or False
variables["pre_install_exists"] = bool(info.get("pre_install"))
variables["post_install_exists"] = bool(info.get("post_install"))
Expand Down
8 changes: 4 additions & 4 deletions docs/source/construct-yaml.md
Original file line number Diff line number Diff line change
Expand Up @@ -552,11 +552,11 @@ interactive installation. (Windows only).

Only applies if `register_python` is true.

### `check_path_length`
### ~~`check_path_length`~~

Check the length of the path where the distribution is installed to ensure nodejs
can be installed. Raise a message to request shorter paths (less than 46 character)
or enable long paths on windows > 10 (require admin right). Default is False. (Windows only).
_Deprecated_. Path length validation is now always performed using the computed
maximum relative path length from the package contents. This option will be
removed in a future version. (Windows only).

### `check_path_spaces`

Expand Down
1 change: 1 addition & 0 deletions tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pytest
pytest-cov
pytest-mock
coverage
jinja2
ruamel.yaml
100 changes: 99 additions & 1 deletion tests/test_fcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

from constructor.fcp import check_duplicates, exclude_packages
from constructor.fcp import check_duplicates, check_duplicates_files, exclude_packages


class GenericObject:
Expand Down Expand Up @@ -64,3 +64,101 @@ def test_exclude_packages(values: tuple[..., GenericObject], expected_value):
with context:
packages = exclude_packages(*values)
assert packages == expected_value


class MockPathData:
"""Represents a file path entry."""

def __init__(self, path):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add typing to new code where we can.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done 5add07a

self.path = path
self.size_in_bytes = 1 # must be non-zero to avoid filesystem access


class MockPathsJson:
"""Wraps a list of MockPathData entries."""

def __init__(self, paths):
self.paths = [MockPathData(p) for p in paths]


class MockPackageCacheRecord:
"""Represents a package record."""

def __init__(self, fn, extracted_package_dir, paths):
self.fn = fn
self.extracted_package_dir = extracted_package_dir
self._paths = paths

def get(self, key, default=None):
return default


def test_check_duplicates_files_returns_max_path_length(mocker):
"""Verify function returns max path length as third tuple element."""
pc_rec1 = MockPackageCacheRecord(
fn="pkg1-1.0.tar.bz2",
extracted_package_dir="/cache/pkg1",
paths=["lib/short.py"], # 12 chars
)
pc_rec2 = MockPackageCacheRecord(
fn="pkg2-1.0.tar.bz2",
extracted_package_dir="/cache/pkg2",
paths=["lib/python3.10/site-packages/longer.py"], # 38 chars
)

def read_paths_side_effect(extracted_dir):
if extracted_dir == "/cache/pkg1":
return MockPathsJson(pc_rec1._paths)
return MockPathsJson(pc_rec2._paths)

mock_read_paths = mocker.patch("constructor.fcp.read_paths_json")
mock_read_paths.side_effect = read_paths_side_effect

result = check_duplicates_files([pc_rec1, pc_rec2], "win-64", duplicate_files="skip")

assert len(result) == 3
_, _, max_path_len = result
assert max_path_len == 38


def test_check_duplicates_files_empty_packages(mocker):
"""Verify returns 0 when no packages provided."""
mock_read_paths = mocker.patch("constructor.fcp.read_paths_json")

result = check_duplicates_files([], "win-64", duplicate_files="skip")

assert len(result) == 3
assert result[2] == 0
mock_read_paths.assert_not_called()


def test_check_duplicates_files_with_env_prefixes(mocker):
"""Verify env_prefixes adds prefix length to max path calculation."""
pc_rec_base = MockPackageCacheRecord(
fn="base-pkg-1.0.tar.bz2",
extracted_package_dir="/cache/base-pkg",
paths=["lib/short.py"], # 12 chars, no prefix -> 12
)
pc_rec_env = MockPackageCacheRecord(
fn="env-pkg-1.0.tar.bz2",
extracted_package_dir="/cache/env-pkg",
paths=["lib/short.py"], # 12 chars, with "envs/myenv/" prefix (11) -> 23
)

def read_paths_side_effect(extracted_dir):
if extracted_dir == "/cache/base-pkg":
return MockPathsJson(pc_rec_base._paths)
return MockPathsJson(pc_rec_env._paths)

mock_read_paths = mocker.patch("constructor.fcp.read_paths_json")
mock_read_paths.side_effect = read_paths_side_effect

env_prefixes = {pc_rec_env: "envs/myenv/"}
result = check_duplicates_files(
[pc_rec_base, pc_rec_env], "win-64", duplicate_files="skip", env_prefixes=env_prefixes
)

assert len(result) == 3
_, _, max_path_len = result
# "envs/myenv/" (11) + "lib/short.py" (12) = 23
assert max_path_len == 23
Loading