diff --git a/.changes/unreleased/added-20260319-095618.yaml b/.changes/unreleased/added-20260319-095618.yaml new file mode 100644 index 00000000..a97ce140 --- /dev/null +++ b/.changes/unreleased/added-20260319-095618.yaml @@ -0,0 +1,9 @@ +kind: added +body: Add `get_changed_items()` utility function to detect Fabric items changed via git diff for use with selective deployment +time: 2026-03-19T09:56:18.0000000+00:00 +custom: + Author: vipulb91 + AuthorLink: https://github.com/vipulb91 + Issue: "865" + IssueLink: https://github.com/microsoft/fabric-cicd/issues/865 + diff --git a/docs/how_to/optional_feature.md b/docs/how_to/optional_feature.md index 6add96d0..44e5e80f 100644 --- a/docs/how_to/optional_feature.md +++ b/docs/how_to/optional_feature.md @@ -99,6 +99,43 @@ Shortcuts are items associated with Lakehouse items and can be selectively publi **Note:** This feature can be applied along with the other selective deployment features — please be cautious when using to avoid unexpected results. +## Git-Based Change Detection + +`get_changed_items()` is a public utility function that uses `git diff` to detect which Fabric items have been added, modified, or renamed relative to a given git reference. It returns a list of strings in `"item_name.item_type"` format that can be passed directly to `items_to_include` in `publish_all_items()`. + +While `get_changed_items()` itself requires no feature flags, passing its output to `items_to_include` requires the experimental feature flags. + +**Important:** If `get_changed_items()` returns an empty list (no changes detected), do not call `publish_all_items()` without an explicit `items_to_include` list, as this would default to a full deployment. Always guard against the empty-list case: + +```python +from fabric_cicd import FabricWorkspace, publish_all_items, get_changed_items, append_feature_flag + +append_feature_flag("enable_experimental_features") +append_feature_flag("enable_items_to_include") + +workspace = FabricWorkspace( + workspace_id="your-workspace-id", + repository_directory="/path/to/repo", + item_type_in_scope=["Notebook", "DataPipeline"], + token_credential=token_credential, +) + +changed = get_changed_items(workspace.repository_directory) + +if changed: + publish_all_items(workspace, items_to_include=changed) +else: + print("No changed items detected — skipping deployment.") +``` + +To compare against a branch or a specific commit instead of the previous commit, pass a custom `git_compare_ref`: + +```python +changed = get_changed_items(workspace.repository_directory, git_compare_ref="main") +``` + +**Note:** `get_changed_items()` returns only items that were **modified or added** (i.e., candidates for publishing). It does not return deleted items. Passing `items_to_include` to `publish_all_items()` requires enabling the `enable_experimental_features` and `enable_items_to_include` feature flags. + ## Debugging If an error arises, or you want full transparency to all calls being made outside the library, enable debugging. Enabling debugging will write all API calls to the terminal. The logs can also be found in the `fabric_cicd.error.log` file. diff --git a/src/fabric_cicd/__init__.py b/src/fabric_cicd/__init__.py index dfd1a4f2..e708f194 100644 --- a/src/fabric_cicd/__init__.py +++ b/src/fabric_cicd/__init__.py @@ -9,6 +9,7 @@ import fabric_cicd.constants as constants from fabric_cicd._common._check_utils import check_version from fabric_cicd._common._deployment_result import DeploymentResult, DeploymentStatus +from fabric_cicd._common._git_diff_utils import get_changed_items from fabric_cicd._common._logging import configure_logger, exception_handler, get_file_handler from fabric_cicd.constants import FeatureFlag, ItemType from fabric_cicd.fabric_workspace import FabricWorkspace @@ -148,6 +149,7 @@ def disable_file_logging() -> None: "configure_external_file_logging", "deploy_with_config", "disable_file_logging", + "get_changed_items", "publish_all_items", "unpublish_all_orphan_items", ] diff --git a/src/fabric_cicd/_common/_git_diff_utils.py b/src/fabric_cicd/_common/_git_diff_utils.py new file mode 100644 index 00000000..29cee989 --- /dev/null +++ b/src/fabric_cicd/_common/_git_diff_utils.py @@ -0,0 +1,235 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Utility functions for detecting Fabric items changed via git diff.""" + +import json +import logging +import subprocess +from pathlib import Path +from typing import Optional + +logger = logging.getLogger(__name__) + + +def _find_platform_item(file_path: Path, repo_root: Path) -> Optional[tuple[str, str]]: + """ + Walk up from file_path towards repo_root looking for a .platform file. + + The .platform file marks the boundary of a Fabric item directory. + Its JSON content contains ``metadata.type`` (item type) and + ``metadata.displayName`` (item name). + + Returns: + A ``(item_name, item_type)`` tuple, or ``None`` if not found. + """ + current = file_path.parent + while True: + platform_file = current / ".platform" + if platform_file.exists(): + try: + data = json.loads(platform_file.read_text(encoding="utf-8")) + metadata = data.get("metadata", {}) + item_type = metadata.get("type") + item_name = metadata.get("displayName") or current.name + if item_type: + return item_name, item_type + except Exception as exc: + logger.debug(f"Could not parse .platform file at '{platform_file}': {exc}") + # Stop if we have reached the repository root or the filesystem root + if current == repo_root or current == current.parent: + break + current = current.parent + return None + + +def _resolve_git_diff_path( + file_path_str: str, + git_root: Path, + repository_directory: Path, +) -> Optional[Path]: + """ + Resolve and validate a file path from git diff output. + + Follows the same resolve → boundary-check → reject contract as + ``_resolve_file_path`` in ``_parameter/_utils.py``, adapted for + paths that are relative to a git root with containment checked + against a (potentially different) repository subdirectory. + + Args: + file_path_str: Relative path string from git diff output. + git_root: Resolved absolute path of the git repository root. + repository_directory: Resolved absolute path of the configured + repository directory (may be a subdirectory of git_root). + + Returns: + Resolved absolute Path if valid and within boundary, None otherwise. + """ + raw_path = Path(file_path_str) + + # Reject absolute paths — git diff should only produce relative paths + if raw_path.is_absolute(): + logger.debug(f"get_changed_items: skipping absolute path '{file_path_str}'") + return None + + # Reject traversal sequences before resolution (mirrors _validate_wildcard_syntax) + if ".." in raw_path.parts: + logger.debug(f"get_changed_items: skipping path with traversal '{file_path_str}'") + return None + + # Reject null bytes + if "\x00" in file_path_str: + logger.debug("get_changed_items: skipping path with null bytes") + return None + + # Step 1: Resolve relative to git root (analogous to _resolve_file_path Step 1) + resolved_path = (git_root / file_path_str).resolve() + + # Step 2: Boundary check against repository_directory (analogous to _resolve_file_path Step 2) + try: + resolved_path.relative_to(repository_directory) + except ValueError: + return None + + # Note: No Step 3 (existence check) — deleted files won't exist on disk + return resolved_path + + +def get_changed_items( + repository_directory: Path, + git_compare_ref: str = "HEAD~1", +) -> list[str]: + """ + Return the list of Fabric items that were added, modified, or renamed relative to ``git_compare_ref``. + + The returned list is in ``"item_name.item_type"`` format and can be passed directly + to the ``items_to_include`` parameter of :func:`publish_all_items` to deploy only + what has changed since the last commit. + + Args: + repository_directory: Path to the local git repository directory + (e.g. ``FabricWorkspace.repository_directory``). + git_compare_ref: Git ref to compare against. Defaults to ``"HEAD~1"``. + + Returns: + List of strings in ``"item_name.item_type"`` format. Returns an empty list when + no changes are detected, the git root cannot be found, or git is unavailable. + + Examples: + Deploy only changed items + >>> from azure.identity import AzureCliCredential + >>> from fabric_cicd import FabricWorkspace, publish_all_items, get_changed_items + >>> workspace = FabricWorkspace( + ... workspace_id="your-workspace-id", + ... repository_directory="/path/to/repo", + ... item_type_in_scope=["Notebook", "DataPipeline"], + ... token_credential=AzureCliCredential() + ... ) + >>> changed = get_changed_items(workspace.repository_directory) + >>> if changed: + ... publish_all_items(workspace, items_to_include=changed) + + With a custom git ref + >>> changed = get_changed_items(workspace.repository_directory, git_compare_ref="main") + >>> if changed: + ... publish_all_items(workspace, items_to_include=changed) + """ + changed, _ = _resolve_changed_items(Path(repository_directory), git_compare_ref) + return changed + + +def _resolve_changed_items( + repository_directory: Path, + git_compare_ref: str, +) -> tuple[list[str], list[str]]: + """ + Use ``git diff --name-status`` to detect Fabric items that changed or were + deleted relative to *git_compare_ref*. + + Args: + repository_directory: Absolute path to the local repository directory + (as stored on ``FabricWorkspace.repository_directory``). + git_compare_ref: Git ref to diff against (e.g. ``"HEAD~1"``). + + Returns: + A two-element tuple ``(changed_items, deleted_items)`` where each + element is a list of strings in ``"item_name.item_type"`` format. + Both lists are empty when the git root cannot be found or git fails. + """ + from fabric_cicd._common._config_validator import _find_git_root + from fabric_cicd._common._validate_input import validate_git_compare_ref + + validate_git_compare_ref(git_compare_ref) + + git_root = _find_git_root(repository_directory) + if git_root is None: + logger.warning("get_changed_items: could not locate a git repository root — returning empty list.") + return [], [] + + try: + result = subprocess.run( + ["git", "diff", "--name-status", git_compare_ref], + cwd=str(git_root), + capture_output=True, + text=True, + check=True, + timeout=30, + ) + except subprocess.CalledProcessError as exc: + logger.warning(f"get_changed_items: 'git diff' failed ({exc.stderr.strip()}) — returning empty list.") + return [], [] + except subprocess.TimeoutExpired: + logger.warning("get_changed_items: 'git diff' timed out — returning empty list.") + return [], [] + + changed_items: set[str] = set() + deleted_items: set[str] = set() + + git_root_resolved = git_root.resolve() + repo_dir_resolved = repository_directory.resolve() + + for line in result.stdout.splitlines(): + line = line.strip() + if not line: + continue + + parts = line.split("\t") + status = parts[0].strip() + + # Renames produce three tab-separated fields: R\told\tnew + if status.startswith("R") and len(parts) >= 3: + file_path_str = parts[2] + elif len(parts) >= 2: + file_path_str = parts[1] + else: + continue + + abs_path = _resolve_git_diff_path(file_path_str, git_root_resolved, repo_dir_resolved) + if abs_path is None: + continue + + if status == "D": + if abs_path.name == ".platform": + try: + show_result = subprocess.run( + ["git", "show", f"{git_compare_ref}:{file_path_str}"], + cwd=str(git_root_resolved), + capture_output=True, + text=True, + check=True, + timeout=30, + ) + data = json.loads(show_result.stdout) + metadata = data.get("metadata", {}) + item_type = metadata.get("type") + item_name = metadata.get("displayName") or abs_path.parent.name + if item_type and item_name: + deleted_items.add(f"{item_name}.{item_type}") + except Exception as exc: + logger.debug(f"get_changed_items: could not read deleted .platform '{file_path_str}': {exc}") + else: + item_info = _find_platform_item(abs_path, repo_dir_resolved) + if item_info: + changed_items.add(f"{item_info[0]}.{item_info[1]}") + + return list(changed_items), list(deleted_items) diff --git a/src/fabric_cicd/_common/_validate_input.py b/src/fabric_cicd/_common/_validate_input.py index ae5d68a3..52e3efdf 100644 --- a/src/fabric_cicd/_common/_validate_input.py +++ b/src/fabric_cicd/_common/_validate_input.py @@ -278,3 +278,31 @@ def validate_shortcut_exclude_regex(shortcut_exclude_regex: Optional[str]) -> No warning_message="Shortcut exclusion is enabled.", risk_warning="Using shortcut_exclude_regex will selectively exclude shortcuts from being deployed to lakehouses. Use with caution.", ) + + +def validate_git_compare_ref(git_compare_ref: str) -> str: + """ + Validate the git_compare_ref parameter to prevent git flag injection. + + Args: + git_compare_ref: The git ref to compare against. + + Raises: + InputError: If the ref is empty, starts with '-', or contains invalid characters. + """ + validate_data_type("string", "git_compare_ref", git_compare_ref) + + if not git_compare_ref.strip(): + msg = "git_compare_ref must not be an empty string." + raise InputError(msg, logger) + + if git_compare_ref.startswith("-"): + msg = "git_compare_ref must not start with '-' to prevent git flag injection." + raise InputError(msg, logger) + + # Allow only characters valid in git refs: alphanumeric, /, ., ~, ^, -, _ + if not re.match(r"^[a-zA-Z0-9/_.\-~^@{}]+$", git_compare_ref): + msg = f"git_compare_ref '{git_compare_ref}' contains invalid characters." + raise InputError(msg, logger) + + return git_compare_ref diff --git a/src/fabric_cicd/_items/_base_publisher.py b/src/fabric_cicd/_items/_base_publisher.py index 24eaf200..8b3011e4 100644 --- a/src/fabric_cicd/_items/_base_publisher.py +++ b/src/fabric_cicd/_items/_base_publisher.py @@ -357,11 +357,26 @@ def get_items_to_publish(self) -> dict[str, "Item"]: Get the items to publish for this item type. Returns: - Dictionary mapping item names to Item objects. + Dictionary mapping item names to Item objects, pre-filtered by + items_to_include when set so that only relevant items are iterated. Subclasses can override to filter or transform the items. - """ - return self.fabric_workspace_obj.repository_items.get(self.item_type, {}) + + Note: + The base implementation applies ``FabricWorkspace.items_to_include`` filtering. + To override this method and preserve this behavior, call ``super().get_items_to_publish()`` + to keep ``items_to_include`` support, then apply any additional selection logic. + """ + all_items = self.fabric_workspace_obj.repository_items.get(self.item_type, {}) + items_to_include = self.fabric_workspace_obj.items_to_include + if not items_to_include: + return all_items + normalized_include_set = {i.lower() for i in items_to_include} + return { + name: item + for name, item in all_items.items() + if f"{name}.{self.item_type}".lower() in normalized_include_set + } def get_unpublish_order(self, items_to_unpublish: list[str]) -> list[str]: """ diff --git a/src/fabric_cicd/publish.py b/src/fabric_cicd/publish.py index abc1e950..f19f6877 100644 --- a/src/fabric_cicd/publish.py +++ b/src/fabric_cicd/publish.py @@ -177,6 +177,19 @@ def publish_all_items( >>> # Access individual item response (dict with "header", "body", "status_code" keys) >>> notebook_response = workspace.responses["Notebook"]["Hello World"] >>> print(notebook_response["status_code"]) # e.g., 200 + + With get_changed_items (deploy only git-changed items) + >>> from fabric_cicd import FabricWorkspace, publish_all_items, get_changed_items + >>> from azure.identity import AzureCliCredential + >>> workspace = FabricWorkspace( + ... workspace_id="your-workspace-id", + ... repository_directory="/path/to/repo", + ... item_type_in_scope=["Notebook", "DataPipeline"], + ... token_credential=AzureCliCredential() # or any other TokenCredential + ... ) + >>> changed = get_changed_items(workspace.repository_directory) + >>> if changed: + ... publish_all_items(workspace, items_to_include=changed) """ fabric_workspace_obj = validate_fabric_workspace_obj(fabric_workspace_obj) responses_enabled = FeatureFlag.ENABLE_RESPONSE_COLLECTION.value in constants.FEATURE_FLAG @@ -340,6 +353,7 @@ def unpublish_all_orphan_items( >>> print(notebook_response["status_code"]) # e.g., 200 """ fabric_workspace_obj = validate_fabric_workspace_obj(fabric_workspace_obj) + validate_items_to_include(items_to_include, operation=constants.OperationType.UNPUBLISH) responses_enabled = FeatureFlag.ENABLE_RESPONSE_COLLECTION.value in constants.FEATURE_FLAG diff --git a/tests/test_git_diff_utils.py b/tests/test_git_diff_utils.py new file mode 100644 index 00000000..d10068d7 --- /dev/null +++ b/tests/test_git_diff_utils.py @@ -0,0 +1,363 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Tests for git diff utilities: get_changed_items() and validate_git_compare_ref().""" + +from unittest.mock import patch + +import pytest + +import fabric_cicd._common._git_diff_utils as git_utils +from fabric_cicd._common._exceptions import InputError +from fabric_cicd._common._validate_input import validate_git_compare_ref + +# ============================================================================= +# Tests for validate_git_compare_ref() +# ============================================================================= + + +class TestValidateGitCompareRef: + def test_accepts_common_valid_refs(self): + assert validate_git_compare_ref("HEAD~1") == "HEAD~1" + assert validate_git_compare_ref("main") == "main" + assert validate_git_compare_ref("feature/my_branch") == "feature/my_branch" + assert validate_git_compare_ref("release/v1.2.3") == "release/v1.2.3" + + def test_rejects_empty_string(self): + with pytest.raises(InputError): + validate_git_compare_ref("") + + def test_rejects_whitespace_only(self): + with pytest.raises(InputError): + validate_git_compare_ref(" ") + + def test_rejects_dash_prefixed(self): + with pytest.raises(InputError): + validate_git_compare_ref("-n") + with pytest.raises(InputError): + validate_git_compare_ref("--help") + + def test_rejects_invalid_characters(self): + with pytest.raises(InputError): + validate_git_compare_ref("ref;rm -rf /") + + def test_rejects_shell_metacharacters(self): + """Prevent shell injection via backticks, pipes, dollar signs, etc.""" + for ref in ["$(whoami)", "`id`", "ref|cat /etc/passwd", "ref&echo bad", "ref\nnewline"]: + with pytest.raises(InputError): + validate_git_compare_ref(ref) + + def test_rejects_non_string_input(self): + """Non-string types must be rejected.""" + with pytest.raises(InputError): + validate_git_compare_ref(123) + with pytest.raises(InputError): + validate_git_compare_ref(None) + + def test_accepts_advanced_git_ref_syntax(self): + """Valid git ref syntax including caret, tilde, and reflog notation.""" + assert validate_git_compare_ref("HEAD^") == "HEAD^" + assert validate_git_compare_ref("HEAD~3") == "HEAD~3" + assert validate_git_compare_ref("main@{1}") == "main@{1}" + assert validate_git_compare_ref("origin/main") == "origin/main" + assert validate_git_compare_ref("v2.0.0") == "v2.0.0" + assert validate_git_compare_ref("abc123def") == "abc123def" + + +# ============================================================================= +# Tests for _resolve_git_diff_path() +# ============================================================================= + + +class TestResolveGitDiffPath: + """Tests for path validation/resolution from git diff output.""" + + def test_rejects_absolute_paths(self, tmp_path): + result = git_utils._resolve_git_diff_path("/etc/passwd", tmp_path, tmp_path) + assert result is None + + def test_rejects_path_traversal(self, tmp_path): + result = git_utils._resolve_git_diff_path("../../../etc/passwd", tmp_path, tmp_path) + assert result is None + + def test_rejects_null_bytes(self, tmp_path): + result = git_utils._resolve_git_diff_path("file\x00.txt", tmp_path, tmp_path) + assert result is None + + def test_accepts_valid_relative_path(self, tmp_path): + sub = tmp_path / "MyItem" + sub.mkdir() + result = git_utils._resolve_git_diff_path("MyItem/file.py", tmp_path, tmp_path) + assert result is not None + assert result.name == "file.py" + + def test_rejects_path_outside_repo_directory(self, tmp_path): + """A file under git root but outside the repo subdirectory is rejected.""" + repo_sub = tmp_path / "workspace" + repo_sub.mkdir() + result = git_utils._resolve_git_diff_path("other/file.py", tmp_path, repo_sub) + assert result is None + + +# ============================================================================= +# Tests for _find_platform_item() +# ============================================================================= + + +class TestFindPlatformItem: + """Tests for .platform file discovery and parsing.""" + + def test_finds_platform_in_same_directory(self, tmp_path): + item_dir = tmp_path / "MyItem.Notebook" + item_dir.mkdir() + (item_dir / ".platform").write_text( + '{"metadata": {"type": "Notebook", "displayName": "MyItem"}}', encoding="utf-8" + ) + file_path = item_dir / "notebook.py" + file_path.touch() + result = git_utils._find_platform_item(file_path, tmp_path) + assert result == ("MyItem", "Notebook") + + def test_returns_none_when_no_platform_file(self, tmp_path): + item_dir = tmp_path / "NoItem" + item_dir.mkdir() + file_path = item_dir / "file.py" + file_path.touch() + result = git_utils._find_platform_item(file_path, tmp_path) + assert result is None + + def test_returns_none_for_malformed_platform_json(self, tmp_path): + item_dir = tmp_path / "BadItem" + item_dir.mkdir() + (item_dir / ".platform").write_text("not valid json", encoding="utf-8") + file_path = item_dir / "file.py" + file_path.touch() + result = git_utils._find_platform_item(file_path, tmp_path) + assert result is None + + def test_returns_none_when_metadata_missing_type(self, tmp_path): + item_dir = tmp_path / "NoType" + item_dir.mkdir() + (item_dir / ".platform").write_text( + '{"metadata": {"displayName": "NoType"}}', encoding="utf-8" + ) + file_path = item_dir / "file.py" + file_path.touch() + result = git_utils._find_platform_item(file_path, tmp_path) + assert result is None + + +# ============================================================================= +# Tests for get_changed_items() +# ============================================================================= + + +class TestGetChangedItems: + """Tests for the public get_changed_items() utility function.""" + + def _make_git_diff_output(self, lines: list[str]) -> str: + return "\n".join(lines) + + def test_returns_changed_items_from_git_diff(self, tmp_path): + """Returns items detected as modified/added by git diff.""" + # Set up a fake item directory with a .platform file + item_dir = tmp_path / "MyNotebook.Notebook" + item_dir.mkdir() + platform = item_dir / ".platform" + platform.write_text( + '{"metadata": {"type": "Notebook", "displayName": "MyNotebook"}}', + encoding="utf-8", + ) + changed_file = item_dir / "notebook.py" + changed_file.write_text("print('hello')", encoding="utf-8") + + diff_output = self._make_git_diff_output(["M\tMyNotebook.Notebook/notebook.py"]) + + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run") as mock_run, + ): + mock_run.return_value.stdout = diff_output + mock_run.return_value.returncode = 0 + + result = git_utils.get_changed_items(tmp_path) + + assert result == ["MyNotebook.Notebook"] + + def test_returns_empty_list_when_no_changes(self, tmp_path): + """Returns an empty list when git diff reports no changed files.""" + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run") as mock_run, + ): + mock_run.return_value.stdout = "" + mock_run.return_value.returncode = 0 + + result = git_utils.get_changed_items(tmp_path) + + assert result == [] + + def test_returns_empty_list_when_git_root_not_found(self, tmp_path): + """Returns an empty list and logs a warning when no git root is found.""" + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with patch(git_root_patch, return_value=None): + result = git_utils.get_changed_items(tmp_path) + + assert result == [] + + def test_returns_empty_list_when_git_diff_fails(self, tmp_path): + """Returns an empty list and logs a warning when git diff fails.""" + import subprocess + + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run", side_effect=subprocess.CalledProcessError(1, "git", stderr="bad ref")), + ): + result = git_utils.get_changed_items(tmp_path) + + assert result == [] + + def test_uses_custom_git_compare_ref(self, tmp_path): + """Passes the custom git_compare_ref to the underlying git command.""" + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run") as mock_run, + ): + mock_run.return_value.stdout = "" + mock_run.return_value.returncode = 0 + + git_utils.get_changed_items(tmp_path, git_compare_ref="main") + + call_args = mock_run.call_args[0][0] + assert call_args == ["git", "diff", "--name-status", "main"] + + def test_excludes_files_outside_repository_directory(self, tmp_path): + """Files changed outside the configured repository_directory are ignored.""" + outside_dir = tmp_path / "other_repo" / "SomeItem.Notebook" + outside_dir.mkdir(parents=True) + + diff_output = self._make_git_diff_output(["M\tother_repo/SomeItem.Notebook/item.py"]) + + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run") as mock_run, + ): + mock_run.return_value.stdout = diff_output + mock_run.return_value.returncode = 0 + + # Use a subdirectory as the repository_directory so "other_repo" is out of scope + repo_subdir = tmp_path / "my_workspace" + repo_subdir.mkdir() + result = git_utils.get_changed_items(repo_subdir) + + assert result == [] + + def test_deduplicates_multiple_files_in_same_item(self, tmp_path): + """Multiple changed files in the same item should produce a single entry.""" + item_dir = tmp_path / "MyNotebook.Notebook" + item_dir.mkdir() + (item_dir / ".platform").write_text( + '{"metadata": {"type": "Notebook", "displayName": "MyNotebook"}}', + encoding="utf-8", + ) + (item_dir / "file1.py").write_text("a", encoding="utf-8") + (item_dir / "file2.py").write_text("b", encoding="utf-8") + + diff_output = self._make_git_diff_output([ + "M\tMyNotebook.Notebook/file1.py", + "M\tMyNotebook.Notebook/file2.py", + ]) + + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run") as mock_run, + ): + mock_run.return_value.stdout = diff_output + mock_run.return_value.returncode = 0 + result = git_utils.get_changed_items(tmp_path) + + assert result == ["MyNotebook.Notebook"] + + def test_handles_renamed_files(self, tmp_path): + """Renamed files (R status) should be detected via the new path.""" + item_dir = tmp_path / "Renamed.Notebook" + item_dir.mkdir() + (item_dir / ".platform").write_text( + '{"metadata": {"type": "Notebook", "displayName": "Renamed"}}', + encoding="utf-8", + ) + (item_dir / "new_name.py").write_text("x", encoding="utf-8") + + diff_output = self._make_git_diff_output(["R100\tOld.Notebook/old.py\tRenamed.Notebook/new_name.py"]) + + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run") as mock_run, + ): + mock_run.return_value.stdout = diff_output + mock_run.return_value.returncode = 0 + result = git_utils.get_changed_items(tmp_path) + + assert result == ["Renamed.Notebook"] + + def test_returns_empty_list_on_timeout(self, tmp_path): + """A git diff timeout should return an empty list gracefully.""" + import subprocess + + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run", side_effect=subprocess.TimeoutExpired("git", 30)), + ): + result = git_utils.get_changed_items(tmp_path) + + assert result == [] + + def test_multiple_distinct_items(self, tmp_path): + """Changes across multiple items should all be returned.""" + for name, item_type in [("NB1", "Notebook"), ("Pipeline1", "DataPipeline")]: + item_dir = tmp_path / f"{name}.{item_type}" + item_dir.mkdir() + (item_dir / ".platform").write_text( + f'{{"metadata": {{"type": "{item_type}", "displayName": "{name}"}}}}', + encoding="utf-8", + ) + (item_dir / "file.py").write_text("content", encoding="utf-8") + + diff_output = self._make_git_diff_output([ + "M\tNB1.Notebook/file.py", + "A\tPipeline1.DataPipeline/file.py", + ]) + + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run") as mock_run, + ): + mock_run.return_value.stdout = diff_output + mock_run.return_value.returncode = 0 + result = git_utils.get_changed_items(tmp_path) + + assert sorted(result) == ["NB1.Notebook", "Pipeline1.DataPipeline"] + + def test_rejects_dangerous_git_compare_ref(self, tmp_path): + """Passing an invalid git_compare_ref should raise InputError before running git.""" + with pytest.raises(InputError): + git_utils.get_changed_items(tmp_path, git_compare_ref="--exec=whoami") diff --git a/tests/test_publish.py b/tests/test_publish.py index 252aaf26..2342940d 100644 --- a/tests/test_publish.py +++ b/tests/test_publish.py @@ -805,7 +805,9 @@ def test_folder_exclusion_with_items_to_include(mock_endpoint, temp_workspace_di assert workspace.repository_items["Notebook"]["ImportantNotebook"].skip_publish is True assert workspace.repository_items["Notebook"]["StandaloneNotebook"].skip_publish is False - assert workspace.repository_items["Notebook"]["OtherNotebook"].skip_publish is True + # OtherNotebook is pre-filtered in get_items_to_publish() because it is not in + # items_to_include, so _publish_item is never called and skip_publish stays False. + assert workspace.repository_items["Notebook"]["OtherNotebook"].skip_publish is False @pytest.mark.usefixtures("experimental_feature_flags") @@ -866,8 +868,10 @@ def test_folder_inclusion_with_items_to_include(mock_endpoint, temp_workspace_di ) assert workspace.repository_items["Notebook"]["Notebook1"].skip_publish is False - assert workspace.repository_items["Notebook"]["Notebook2"].skip_publish is True - assert workspace.repository_items["Notebook"]["ArchivedNotebook"].skip_publish is True + # Notebook2 and ArchivedNotebook are pre-filtered in get_items_to_publish() + # because they are not in items_to_include, so skip_publish stays False. + assert workspace.repository_items["Notebook"]["Notebook2"].skip_publish is False + assert workspace.repository_items["Notebook"]["ArchivedNotebook"].skip_publish is False @pytest.mark.usefixtures("experimental_feature_flags") @@ -899,10 +903,13 @@ def test_all_filters_combined(mock_endpoint, temp_workspace_dir): items_to_include=["TargetNotebook.Notebook"], ) - assert workspace.repository_items["Notebook"]["DebugNotebook"].skip_publish is True + # DebugNotebook, OtherNotebook, and ArchivedNotebook are pre-filtered in + # get_items_to_publish() because they are not in items_to_include, so + # _publish_item is never called and skip_publish stays False. + assert workspace.repository_items["Notebook"]["DebugNotebook"].skip_publish is False assert workspace.repository_items["Notebook"]["TargetNotebook"].skip_publish is False - assert workspace.repository_items["Notebook"]["OtherNotebook"].skip_publish is True - assert workspace.repository_items["Notebook"]["ArchivedNotebook"].skip_publish is True + assert workspace.repository_items["Notebook"]["OtherNotebook"].skip_publish is False + assert workspace.repository_items["Notebook"]["ArchivedNotebook"].skip_publish is False # ============================================================================= @@ -1021,3 +1028,4 @@ def test_files_sorted_with_unknown_extension(self, publisher): assert mock_item.item_files[1].file_path.name == "notebook.py" assert mock_item.item_files[2].file_path.name == "readme.md" assert mock_item.item_files[3].file_path.name == "notebook.json" +