diff --git a/CHANGELOG.md b/CHANGELOG.md index 32c7c446..b655db27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Unreleased - Added `**kwargs` to `AzureBlobFileSystem.exists()` - Populate `AzureBlobFile.version_id` on write when `version_aware` is enabled. - Fixed issue where unawaitable Credential types were incorrectly awaited (#431) +- Fixed bug where `fs.ls(detail=True)` returned Etag formatted without double quotes.[#544](https://github.com/fsspec/adlfs/pull/544) 2026.2.0 -------- diff --git a/adlfs/spec.py b/adlfs/spec.py index 0c0a8625..6d72da8c 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -156,6 +156,27 @@ def _create_aio_blob_service_client_from_connection_string( ) +def _strip_quotes_from_etag(etag: str) -> Optional[str]: + """ + Normalizes blob etag values to always be returned wrapped with double quotes regardless + of the format gotten from the SDK. + + Parameters + ---------- + etag: str + Raw etag value from Azure + + Returns + ------- + str + Returns the normalized etag + """ + if etag is None: + return None + double_quote = '"' + return f'"{etag.strip(double_quote)}"' + + class AzureBlobFileSystem(AsyncFileSystem): """ Access Azure Datalake Gen2 and Azure Storage if it were a file system using Multiprotocol Access @@ -897,6 +918,9 @@ async def _details( for key in FORWARDED_BLOB_PROPERTIES if content.has_key(key) # NOQA } + # Return a string with double quotes for consistency + if data.get("etag") is not None: + data["etag"] = _strip_quotes_from_etag(data["etag"]) if self.version_aware: data.update( (key, content[key]) diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index e98f47fc..5cb734e1 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -20,6 +20,7 @@ from pandas.testing import assert_frame_equal from adlfs import AzureBlobFile, AzureBlobFileSystem +from adlfs.spec import _strip_quotes_from_etag from adlfs.tests.constants import ( ACCOUNT_NAME, CONN_STR, @@ -2571,3 +2572,41 @@ class TestCloseCredential: async def test_close_credential(self, credential): file_obj = SimpleNamespace(credential=credential) await close_credential(file_obj) + + +def test_etag_normalized_form(storage): + """ + Tests a consistent quoted etag format with ls() and info() calls. + """ + fs = AzureBlobFileSystem( + account_name=storage.account_name, + connection_string=CONN_STR, + ) + path = "data/root/a/file.txt" + # Get etag info from ls(detail = True) and info() + ls_results = fs.ls(path, detail=True, refresh=True) + ls_etag = [f["etag"] for f in ls_results if f["name"] == path][0] + + info_etag = fs.info(path, refresh=True)["etag"] + + assert info_etag == ls_etag + # Validate both etags are quoted + assert ls_etag.startswith('"') and info_etag.startswith('"') + assert ls_etag.endswith('"') and info_etag.endswith('"') + # Validate etag is not double quoted + assert not ls_etag.startswith('""') and not ls_etag.endswith('""') + + +@pytest.mark.parametrize( + "input_etag,expected_etag", + [ + pytest.param("0xA123456", '"0xA123456"', id="bug"), + pytest.param('"0xA123456"', '"0xA123456"', id="normal"), + pytest.param('""0xA123456""', '"0xA123456"', id="double-double-quotes"), + pytest.param('"0xA123456', '"0xA123456"', id="leading-double-quote"), + pytest.param('0xA123456"', '"0xA123456"', id="trailing-double-quote"), + pytest.param(None, None), + ], +) +def test_striping_etag(input_etag, expected_etag): + assert _strip_quotes_from_etag(input_etag) == expected_etag