From 7f7a684d2439136b16b58e15aed7d53fc7db6292 Mon Sep 17 00:00:00 2001 From: Jeffrey Wagner Date: Tue, 15 Jul 2025 12:03:45 -0700 Subject: [PATCH 01/13] mock fetching for more tests to avoid rate limiting --- openff/nagl_models/__init__.py | 2 +- .../nagl_models/tests/test_dynamic_fetch.py | 71 +++++++++++++------ 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/openff/nagl_models/__init__.py b/openff/nagl_models/__init__.py index 93c1baf..6638f5b 100644 --- a/openff/nagl_models/__init__.py +++ b/openff/nagl_models/__init__.py @@ -10,7 +10,7 @@ load_nagl_model_directory_entry_points, validate_nagl_model_path, list_available_nagl_models, - get_models_by_type + get_models_by_type, ) from openff.nagl_models._dynamic_fetch import get_model diff --git a/openff/nagl_models/tests/test_dynamic_fetch.py b/openff/nagl_models/tests/test_dynamic_fetch.py index 149ff08..330a6c0 100644 --- a/openff/nagl_models/tests/test_dynamic_fetch.py +++ b/openff/nagl_models/tests/test_dynamic_fetch.py @@ -170,32 +170,57 @@ def test_all_models_loadable(model, monkeypatch): GNNModel.load(get_model(model), eval_mode=True) -def test_get_model_by_doi_and_hash(hide_cache): +def test_get_model_by_doi_and_hash(hide_cache, monkeypatch): # This test uses a Zenodo sandbox DOI (10.5072 prefix) and the corresponding # SHA256 hash of the test file uploaded to that sandbox record - get_model( - "my_favorite_model.pt", - doi="10.5072/zenodo.278300", - file_hash="127eb0b9512f22546f8b455582bcd85b2521866d32b86d231fee26d4771b1d81", - ) - - -def test_get_model_by_doi_no_hash(hide_cache): - get_model("my_favorite_model.pt", doi="10.5072/zenodo.278300") - + with monkeypatch.context() as m: + m.setattr( + openff.nagl_models._dynamic_fetch, + "get_release_metadata", + mocked_get_release_metadata, + ) -def test_get_model_hash_comparison_fails(): - with pytest.raises(HashComparisonFailedException): get_model( "my_favorite_model.pt", doi="10.5072/zenodo.278300", - file_hash="wrong_hash", + file_hash="127eb0b9512f22546f8b455582bcd85b2521866d32b86d231fee26d4771b1d81", + ) + +def test_get_model_by_doi_no_hash(hide_cache, monkeypatch): + with monkeypatch.context() as m: + m.setattr( + openff.nagl_models._dynamic_fetch, + "get_release_metadata", + mocked_get_release_metadata, + ) + + get_model("my_favorite_model.pt", doi="10.5072/zenodo.278300") + + +def test_get_model_hash_comparison_fails(monkeypatch): + with monkeypatch.context() as m: + m.setattr( + openff.nagl_models._dynamic_fetch, + "get_release_metadata", + mocked_get_release_metadata, ) + with pytest.raises(HashComparisonFailedException): + get_model( + "my_favorite_model.pt", + doi="10.5072/zenodo.278300", + file_hash="wrong_hash", + ) -def test_user_provided_hash_conflicts_with_known_hash(): - with pytest.raises(HashComparisonFailedException): - get_model("openff-gnn-am1bcc-0.1.0-rc.3.pt", file_hash="wrong_hash") +def test_user_provided_hash_conflicts_with_known_hash(monkeypatch): + with monkeypatch.context() as m: + m.setattr( + openff.nagl_models._dynamic_fetch, + "get_release_metadata", + mocked_get_release_metadata, + ) + with pytest.raises(HashComparisonFailedException): + get_model("openff-gnn-am1bcc-0.1.0-rc.3.pt", file_hash="wrong_hash") def test_malformed_doi(monkeypatch, hide_cache): @@ -215,6 +240,12 @@ def test_malformed_doi(monkeypatch, hide_cache): get_model("my_favorite_model.pt", doi="zenodo.278300") -def test_no_matching_file_at_doi(): - with pytest.raises(FileNotFoundError, match="sandbox.zenodo"): - get_model("file_that_doesnt_exist.pt", doi="10.5072/zenodo.278300") +def test_no_matching_file_at_doi(monkeypatch): + with monkeypatch.context() as m: + m.setattr( + openff.nagl_models._dynamic_fetch, + "get_release_metadata", + mocked_get_release_metadata, + ) + with pytest.raises(FileNotFoundError, match="sandbox.zenodo"): + get_model("file_that_doesnt_exist.pt", doi="10.5072/zenodo.278300") From bf2b5b37ccf8064547ae376f4aeab225fe6d7cf6 Mon Sep 17 00:00:00 2001 From: Jeffrey Wagner Date: Tue, 15 Jul 2025 13:44:56 -0700 Subject: [PATCH 02/13] move pytest-socket usage into tests that require it --- openff/nagl_models/tests/test_dynamic_fetch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openff/nagl_models/tests/test_dynamic_fetch.py b/openff/nagl_models/tests/test_dynamic_fetch.py index 330a6c0..2a2e04f 100644 --- a/openff/nagl_models/tests/test_dynamic_fetch.py +++ b/openff/nagl_models/tests/test_dynamic_fetch.py @@ -6,7 +6,6 @@ import platformdirs import pytest -from pytest_socket import SocketBlockedError, disable_socket import openff.nagl_models._dynamic_fetch from openff.nagl_models import __file__ as root @@ -84,6 +83,8 @@ def hide_cache(): def test_access_internet_with_empty_cache(hide_cache): + from pytest_socket import SocketBlockedError, disable_socket + cache_path = platformdirs.user_cache_path() / "OPENFF_NAGL_MODELS" disable_socket() @@ -99,6 +100,8 @@ def test_access_internet_with_empty_cache(hide_cache): def test_file_exists_in_cache_without_internet(monkeypatch): + from pytest_socket import disable_socket + # since tests can run in different orders, make sure the file exists already with monkeypatch.context() as m: m.setattr( From 1441cffedaa60e7daa8c54d4aa7a263e271d66ab Mon Sep 17 00:00:00 2001 From: Jeffrey Wagner Date: Thu, 17 Jul 2025 08:19:26 -0700 Subject: [PATCH 03/13] add a new type of exception for improper model file suffix (should quickly fail for requests for eg. am1bcc) --- openff/nagl_models/_dynamic_fetch.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/openff/nagl_models/_dynamic_fetch.py b/openff/nagl_models/_dynamic_fetch.py index a8f7616..0554cf8 100644 --- a/openff/nagl_models/_dynamic_fetch.py +++ b/openff/nagl_models/_dynamic_fetch.py @@ -27,6 +27,11 @@ class HashComparisonFailedException(Exception): class UnableToParseDOIException(Exception): """Exception raised when a Zenodo DOI is unable to be parsed according to the expected pattern.""" +class BadFileSuffixError(Exception): + """Exception raised when a model file with an incorrect suffix is requested (this will happen a + lot with the current working of the ToolkitRegistry.call method, where things like "am1bcc" will + be requested from get_model due to toolkit precedence.""" + def get_release_metadata() -> list[dict]: return json.loads(urllib.request.urlopen(RELEASES_URL).read().decode("utf-8")) @@ -75,7 +80,9 @@ def get_model( HashComparisonFailedException FileNotFoundError """ - + if not(filename.endswith(".pt")): + raise BadFileSuffixError(f"NAGLToolkitWrapper does not recognize file path extension " + f"on {filename=}, expected '.pt' suffix") pathlib.Path(CACHE_DIR).mkdir(exist_ok=True) cached_path = CACHE_DIR / filename From 1b21fe591121131406f020f95bdb895e57808e3d Mon Sep 17 00:00:00 2001 From: Jeffrey Wagner Date: Thu, 17 Jul 2025 09:56:30 -0700 Subject: [PATCH 04/13] avoid errors caused by rate limiting by not trying to retrieve from GH release assets at all --- openff/nagl_models/_dynamic_fetch.py | 30 +-- .../nagl_models/tests/test_dynamic_fetch.py | 218 ++++++++++-------- 2 files changed, 135 insertions(+), 113 deletions(-) diff --git a/openff/nagl_models/_dynamic_fetch.py b/openff/nagl_models/_dynamic_fetch.py index 0554cf8..e4444c4 100644 --- a/openff/nagl_models/_dynamic_fetch.py +++ b/openff/nagl_models/_dynamic_fetch.py @@ -96,21 +96,21 @@ def get_model( return cached_path.as_posix() - release_metadata = get_release_metadata() - - # tags with "v" prefix can't easily be sorted, but the result of passing through Version - # are not necessarily 1:1 with the metadata in the releases, keep both and map between - releases: dict[Version:str] = { - Version(release["tag_name"]): release for release in release_metadata - } - - for version in reversed(sorted(releases)): - release = releases[version] - for file in release["assets"]: - if file["name"] == filename: - return _download_and_verify_file( - file["browser_download_url"], cached_path, file_hash - ) + # release_metadata = get_release_metadata() + # + # # tags with "v" prefix can't easily be sorted, but the result of passing through Version + # # are not necessarily 1:1 with the metadata in the releases, keep both and map between + # releases: dict[Version:str] = { + # Version(release["tag_name"]): release for release in release_metadata + # } + # + # for version in reversed(sorted(releases)): + # release = releases[version] + # for file in release["assets"]: + # if file["name"] == filename: + # return _download_and_verify_file( + # file["browser_download_url"], cached_path, file_hash + # ) if doi: try: diff --git a/openff/nagl_models/tests/test_dynamic_fetch.py b/openff/nagl_models/tests/test_dynamic_fetch.py index 2a2e04f..ed3bcc8 100644 --- a/openff/nagl_models/tests/test_dynamic_fetch.py +++ b/openff/nagl_models/tests/test_dynamic_fetch.py @@ -13,6 +13,7 @@ get_model, HashComparisonFailedException, UnableToParseDOIException, + BadFileSuffixError, ) @@ -28,39 +29,39 @@ def mocked_urlretrieve(url, filename): return new, None -def mocked_get_release_metadata(): - # can regenerate this file with - # $ wget https://api.github.com/repos/openforcefield/openff-nagl-models/releases - return json.loads( - open(pathlib.Path(root).parent / "tests/data/releases.json").read() - ) - - -@pytest.mark.parametrize( - "known_model", - [ - "openff-gnn-am1bcc-0.0.1-alpha.1.pt", - "openff-gnn-am1bcc-0.1.0-rc.1.pt", - "openff-gnn-am1bcc-0.1.0-rc.2.pt", - "openff-gnn-am1bcc-0.1.0-rc.3.pt", - ], -) -def test_get_known_models(monkeypatch, known_model): - with monkeypatch.context() as m: - m.setattr( - urllib.request, - "urlretrieve", - mocked_urlretrieve, - ) - m.setattr( - openff.nagl_models._dynamic_fetch, - "get_release_metadata", - mocked_get_release_metadata, - ) - - assert get_model(known_model).endswith(known_model) - - assert "OPENFF_NAGL_MODELS" in get_model(known_model) +# def mocked_get_release_metadata(): +# # can regenerate this file with +# # $ wget https://api.github.com/repos/openforcefield/openff-nagl-models/releases +# return json.loads( +# open(pathlib.Path(root).parent / "tests/data/releases.json").read() +# ) +# +# +# @pytest.mark.parametrize( +# "known_model", +# [ +# "openff-gnn-am1bcc-0.0.1-alpha.1.pt", +# "openff-gnn-am1bcc-0.1.0-rc.1.pt", +# "openff-gnn-am1bcc-0.1.0-rc.2.pt", +# "openff-gnn-am1bcc-0.1.0-rc.3.pt", +# ], +# ) +# def test_get_known_models(monkeypatch, known_model): +# with monkeypatch.context() as m: +# m.setattr( +# urllib.request, +# "urlretrieve", +# mocked_urlretrieve, +# ) +# m.setattr( +# openff.nagl_models._dynamic_fetch, +# "get_release_metadata", +# mocked_get_release_metadata, +# ) +# +# assert get_model(known_model).endswith(known_model) +# +# assert "OPENFF_NAGL_MODELS" in get_model(known_model) @pytest.fixture @@ -82,21 +83,21 @@ def hide_cache(): shutil.move(alt_dir, cache_dir) -def test_access_internet_with_empty_cache(hide_cache): - from pytest_socket import SocketBlockedError, disable_socket - - cache_path = platformdirs.user_cache_path() / "OPENFF_NAGL_MODELS" - - disable_socket() - - # would be nice to test the FileNotFoundError, but much more difficult to get that - # particular network failure vs. checking that the network is accessed at all - with pytest.raises( - SocketBlockedError, - ): - get_model.cache_clear() - - get_model("openff-gnn-am1bcc-0.1.0-rc.3.pt") +# def test_access_internet_with_empty_cache(hide_cache): +# from pytest_socket import SocketBlockedError, disable_socket +# +# cache_path = platformdirs.user_cache_path() / "OPENFF_NAGL_MODELS" +# +# disable_socket() +# +# # would be nice to test the FileNotFoundError, but much more difficult to get that +# # particular network failure vs. checking that the network is accessed at all +# with pytest.raises( +# SocketBlockedError, +# ): +# get_model.cache_clear() +# +# get_model("openff-gnn-am1bcc-0.1.0-rc.3.pt") def test_file_exists_in_cache_without_internet(monkeypatch): @@ -109,11 +110,11 @@ def test_file_exists_in_cache_without_internet(monkeypatch): "urlretrieve", mocked_urlretrieve, ) - m.setattr( - openff.nagl_models._dynamic_fetch, - "get_release_metadata", - mocked_get_release_metadata, - ) + # m.setattr( + # openff.nagl_models._dynamic_fetch, + # "get_release_metadata", + # mocked_get_release_metadata, + # ) assert get_model("openff-gnn-am1bcc-0.1.0-rc.3.pt") @@ -130,16 +131,37 @@ def test_error_on_missing_file(monkeypatch): ), monkeypatch.context() as m, ): - m.setattr( - urllib.request, - "urlretrieve", - mocked_urlretrieve, - ) - m.setattr( - openff.nagl_models._dynamic_fetch, - "get_release_metadata", - mocked_get_release_metadata, - ) + # m.setattr( + # urllib.request, + # "urlretrieve", + # mocked_urlretrieve, + # ) + # m.setattr( + # openff.nagl_models._dynamic_fetch, + # "get_release_metadata", + # mocked_get_release_metadata, + # ) + + get_model("FOOBAR.pt") + +def test_error_on_bad_file_suffix(monkeypatch): + with ( + pytest.raises( + BadFileSuffixError, + match="NAGLToolkitWrapper does not recognize file path extension", + ), + monkeypatch.context() as m, + ): + # m.setattr( + # urllib.request, + # "urlretrieve", + # mocked_urlretrieve, + # ) + # m.setattr( + # openff.nagl_models._dynamic_fetch, + # "get_release_metadata", + # mocked_get_release_metadata, + # ) get_model("FOOBAR.txt") @@ -164,11 +186,11 @@ def test_all_models_loadable(model, monkeypatch): "urlretrieve", mocked_urlretrieve, ) - m.setattr( - openff.nagl_models._dynamic_fetch, - "get_release_metadata", - mocked_get_release_metadata, - ) + # m.setattr( + # openff.nagl_models._dynamic_fetch, + # "get_release_metadata", + # mocked_get_release_metadata, + # ) GNNModel.load(get_model(model), eval_mode=True) @@ -177,11 +199,11 @@ def test_get_model_by_doi_and_hash(hide_cache, monkeypatch): # This test uses a Zenodo sandbox DOI (10.5072 prefix) and the corresponding # SHA256 hash of the test file uploaded to that sandbox record with monkeypatch.context() as m: - m.setattr( - openff.nagl_models._dynamic_fetch, - "get_release_metadata", - mocked_get_release_metadata, - ) + # m.setattr( + # openff.nagl_models._dynamic_fetch, + # "get_release_metadata", + # mocked_get_release_metadata, + # ) get_model( "my_favorite_model.pt", @@ -191,22 +213,22 @@ def test_get_model_by_doi_and_hash(hide_cache, monkeypatch): def test_get_model_by_doi_no_hash(hide_cache, monkeypatch): with monkeypatch.context() as m: - m.setattr( - openff.nagl_models._dynamic_fetch, - "get_release_metadata", - mocked_get_release_metadata, - ) + # m.setattr( + # openff.nagl_models._dynamic_fetch, + # "get_release_metadata", + # mocked_get_release_metadata, + # ) get_model("my_favorite_model.pt", doi="10.5072/zenodo.278300") def test_get_model_hash_comparison_fails(monkeypatch): with monkeypatch.context() as m: - m.setattr( - openff.nagl_models._dynamic_fetch, - "get_release_metadata", - mocked_get_release_metadata, - ) + # m.setattr( + # openff.nagl_models._dynamic_fetch, + # "get_release_metadata", + # mocked_get_release_metadata, + # ) with pytest.raises(HashComparisonFailedException): get_model( @@ -217,11 +239,11 @@ def test_get_model_hash_comparison_fails(monkeypatch): def test_user_provided_hash_conflicts_with_known_hash(monkeypatch): with monkeypatch.context() as m: - m.setattr( - openff.nagl_models._dynamic_fetch, - "get_release_metadata", - mocked_get_release_metadata, - ) + # m.setattr( + # openff.nagl_models._dynamic_fetch, + # "get_release_metadata", + # mocked_get_release_metadata, + # ) with pytest.raises(HashComparisonFailedException): get_model("openff-gnn-am1bcc-0.1.0-rc.3.pt", file_hash="wrong_hash") @@ -233,11 +255,11 @@ def test_malformed_doi(monkeypatch, hide_cache): "urlretrieve", mocked_urlretrieve, ) - m.setattr( - openff.nagl_models._dynamic_fetch, - "get_release_metadata", - mocked_get_release_metadata, - ) + # m.setattr( + # openff.nagl_models._dynamic_fetch, + # "get_release_metadata", + # mocked_get_release_metadata, + # ) with pytest.raises(UnableToParseDOIException): get_model("my_favorite_model.pt", doi="zenodo.278300") @@ -245,10 +267,10 @@ def test_malformed_doi(monkeypatch, hide_cache): def test_no_matching_file_at_doi(monkeypatch): with monkeypatch.context() as m: - m.setattr( - openff.nagl_models._dynamic_fetch, - "get_release_metadata", - mocked_get_release_metadata, - ) + # m.setattr( + # openff.nagl_models._dynamic_fetch, + # "get_release_metadata", + # mocked_get_release_metadata, + # ) with pytest.raises(FileNotFoundError, match="sandbox.zenodo"): get_model("file_that_doesnt_exist.pt", doi="10.5072/zenodo.278300") From 1f671889f0e41ac6ec31ab41c681ceed94df74ba Mon Sep 17 00:00:00 2001 From: Jeffrey Wagner Date: Thu, 24 Jul 2025 20:14:01 -0700 Subject: [PATCH 05/13] reimplement "the nagl_models package looks in the nagl_models package for nagl models" behavior --- openff/nagl_models/_dynamic_fetch.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/openff/nagl_models/_dynamic_fetch.py b/openff/nagl_models/_dynamic_fetch.py index e4444c4..cf10b5c 100644 --- a/openff/nagl_models/_dynamic_fetch.py +++ b/openff/nagl_models/_dynamic_fetch.py @@ -6,7 +6,7 @@ import urllib.request import platformdirs from packaging.version import Version - +from openff.nagl_models import validate_nagl_model_path RELEASES_URL = "https://api.github.com/repos/openforcefield/openff-nagl-models/releases" @@ -45,9 +45,10 @@ def get_model( ) -> str: """ Return the path of a model as cached on disk, downloading if necessary. The lookup order of this implementation is: - 1. Try to retrieve the file from the local cache - 2. Try to fetch the file from a release of https://github.com/openforcefield/openff-nagl-models - 3. Try to fetch the file from the DOI, if provided + 1. Try to retrieve the file from the openff-nagl-models python package + 2. Try to retrieve the file from the local cache + 3. Try to fetch the file from a release of https://github.com/openforcefield/openff-nagl-models + 4. Try to fetch the file from the DOI, if provided This method will raise an HashComparisonFailedException as soon as a hash mismatch is encountered. So if there's a file with a matching name but a non-matching hash in the local cache, an exception will be raised @@ -85,11 +86,20 @@ def get_model( f"on {filename=}, expected '.pt' suffix") pathlib.Path(CACHE_DIR).mkdir(exist_ok=True) - cached_path = CACHE_DIR / filename - + # See if the file has a known hash if file_hash is None and filename in KNOWN_HASHES: file_hash = KNOWN_HASHES[filename] + # See if it's available in the openff-nagl-models python package + try: + file_path = validate_nagl_model_path(filename) + assert_hash_equal(file_path, file_hash) + return file_path + except FileNotFoundError: + pass + + # Then check if it's in the cache + cached_path = CACHE_DIR / filename if cached_path.exists(): if file_hash: assert_hash_equal(cached_path, file_hash) @@ -112,6 +122,7 @@ def get_model( # file["browser_download_url"], cached_path, file_hash # ) + # Otherwise try to fetch from DOI if doi: try: match = re.search(r"10\.(5072|5281)/zenodo\.([0-9]+)", doi) From 49b0248a7fc361ba92a4d854d93e4ee37ebb74ea Mon Sep 17 00:00:00 2001 From: Jeffrey Wagner Date: Wed, 6 Aug 2025 10:02:54 -0700 Subject: [PATCH 06/13] fix docstrings --- openff/nagl_models/_dynamic_fetch.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/openff/nagl_models/_dynamic_fetch.py b/openff/nagl_models/_dynamic_fetch.py index cf10b5c..6149194 100644 --- a/openff/nagl_models/_dynamic_fetch.py +++ b/openff/nagl_models/_dynamic_fetch.py @@ -47,8 +47,7 @@ def get_model( Return the path of a model as cached on disk, downloading if necessary. The lookup order of this implementation is: 1. Try to retrieve the file from the openff-nagl-models python package 2. Try to retrieve the file from the local cache - 3. Try to fetch the file from a release of https://github.com/openforcefield/openff-nagl-models - 4. Try to fetch the file from the DOI, if provided + 3. Try to fetch the file from the DOI, if provided This method will raise an HashComparisonFailedException as soon as a hash mismatch is encountered. So if there's a file with a matching name but a non-matching hash in the local cache, an exception will be raised @@ -74,7 +73,7 @@ def get_model( Returns ------- str - The path to the file if it was found. If the file wasn't found then a FileNotFoundError is rasied. + The path to the file if it was found. If the file wasn't found then a FileNotFoundError is raised. Raises ------ From a80d45f7e839becb4be72e84d669b172a512bb37 Mon Sep 17 00:00:00 2001 From: Jeffrey Wagner Date: Wed, 6 Aug 2025 11:38:44 -0700 Subject: [PATCH 07/13] disable get_models lru caching, consolidate all caching tests into one so they stop interfering, clean up unused monkeypatching now that release assets aren't checked --- openff/nagl_models/_dynamic_fetch.py | 1 - .../nagl_models/tests/test_dynamic_fetch.py | 281 ++++++------------ 2 files changed, 87 insertions(+), 195 deletions(-) diff --git a/openff/nagl_models/_dynamic_fetch.py b/openff/nagl_models/_dynamic_fetch.py index 6149194..1306680 100644 --- a/openff/nagl_models/_dynamic_fetch.py +++ b/openff/nagl_models/_dynamic_fetch.py @@ -37,7 +37,6 @@ def get_release_metadata() -> list[dict]: return json.loads(urllib.request.urlopen(RELEASES_URL).read().decode("utf-8")) -@functools.lru_cache() def get_model( filename: str, doi: None | str = None, diff --git a/openff/nagl_models/tests/test_dynamic_fetch.py b/openff/nagl_models/tests/test_dynamic_fetch.py index ed3bcc8..c26e868 100644 --- a/openff/nagl_models/tests/test_dynamic_fetch.py +++ b/openff/nagl_models/tests/test_dynamic_fetch.py @@ -1,13 +1,10 @@ -import json import os import pathlib import shutil -import urllib.request import platformdirs import pytest -import openff.nagl_models._dynamic_fetch from openff.nagl_models import __file__ as root from openff.nagl_models._dynamic_fetch import ( get_model, @@ -29,41 +26,6 @@ def mocked_urlretrieve(url, filename): return new, None -# def mocked_get_release_metadata(): -# # can regenerate this file with -# # $ wget https://api.github.com/repos/openforcefield/openff-nagl-models/releases -# return json.loads( -# open(pathlib.Path(root).parent / "tests/data/releases.json").read() -# ) -# -# -# @pytest.mark.parametrize( -# "known_model", -# [ -# "openff-gnn-am1bcc-0.0.1-alpha.1.pt", -# "openff-gnn-am1bcc-0.1.0-rc.1.pt", -# "openff-gnn-am1bcc-0.1.0-rc.2.pt", -# "openff-gnn-am1bcc-0.1.0-rc.3.pt", -# ], -# ) -# def test_get_known_models(monkeypatch, known_model): -# with monkeypatch.context() as m: -# m.setattr( -# urllib.request, -# "urlretrieve", -# mocked_urlretrieve, -# ) -# m.setattr( -# openff.nagl_models._dynamic_fetch, -# "get_release_metadata", -# mocked_get_release_metadata, -# ) -# -# assert get_model(known_model).endswith(known_model) -# -# assert "OPENFF_NAGL_MODELS" in get_model(known_model) - - @pytest.fixture def hide_cache(): cache_dir = platformdirs.user_cache_path() / "OPENFF_NAGL_MODELS" @@ -83,85 +45,94 @@ def hide_cache(): shutil.move(alt_dir, cache_dir) -# def test_access_internet_with_empty_cache(hide_cache): -# from pytest_socket import SocketBlockedError, disable_socket -# -# cache_path = platformdirs.user_cache_path() / "OPENFF_NAGL_MODELS" -# -# disable_socket() -# -# # would be nice to test the FileNotFoundError, but much more difficult to get that -# # particular network failure vs. checking that the network is accessed at all -# with pytest.raises( -# SocketBlockedError, -# ): -# get_model.cache_clear() -# -# get_model("openff-gnn-am1bcc-0.1.0-rc.3.pt") +def test_zenodo_fetching_and_caching(hide_cache): + """ + All of the tests that rely on remote fetching into the cache + and checking whether something is in the cache need to be run in + serial, otherwise they'll interfere with each other, so they're + all consolidated into this one test. + """ + # This test uses a Zenodo sandbox DOI (10.5072 prefix) and the corresponding + # SHA256 hash of the test file "my_favorite_model.pt" (which is a copy of + # openff-gnn-am1bcc-0.1.0-rc.3.pt) uploaded to that sandbox record + + from pytest_socket import SocketBlockedError, disable_socket, enable_socket + + disable_socket() -def test_file_exists_in_cache_without_internet(monkeypatch): - from pytest_socket import disable_socket + # Ensure that the cache is hidden, + with pytest.raises(FileNotFoundError): - # since tests can run in different orders, make sure the file exists already - with monkeypatch.context() as m: - m.setattr( - urllib.request, - "urlretrieve", - mocked_urlretrieve, + get_model( + "my_favorite_model.pt", ) - # m.setattr( - # openff.nagl_models._dynamic_fetch, - # "get_release_metadata", - # mocked_get_release_metadata, - # ) - assert get_model("openff-gnn-am1bcc-0.1.0-rc.3.pt") + # Ensure that trying to fetch a + # model fails due to lack of internet access + with pytest.raises( + SocketBlockedError, + ): + get_model( + "my_favorite_model.pt", + doi="10.5072/zenodo.278300", + file_hash="127eb0b9512f22546f8b455582bcd85b2521866d32b86d231fee26d4771b1d81", + ) - disable_socket() + # Ensure that the file can actually be fetched + enable_socket() + get_model( + "my_favorite_model.pt", + doi="10.5072/zenodo.278300", + ) + + # Ensure that, once fetched, the file can be gotten without accessing the internet. + disable_socket() + # Ensure that cached files can be accessed when no optional arguments are provided + get_model( + "my_favorite_model.pt", + ) + + # Ensure that cached files can be accessed when only doi is provided + get_model( + "my_favorite_model.pt", + doi="10.5072/zenodo.278300", + ) + + # Ensure that cached files can be accessed when all optional arguments are provided + get_model( + "my_favorite_model.pt", + doi="10.5072/zenodo.278300", + file_hash="127eb0b9512f22546f8b455582bcd85b2521866d32b86d231fee26d4771b1d81", + ) + + # Ensure that cached files can be accessed when only hash is provided + get_model( + "my_favorite_model.pt", + file_hash="127eb0b9512f22546f8b455582bcd85b2521866d32b86d231fee26d4771b1d81", + ) + + # Ensure that cached files raise hash comparison errors + with pytest.raises(HashComparisonFailedException): + get_model( + "my_favorite_model.pt", + doi="10.5072/zenodo.278300", + file_hash="wrong_hash", + ) - get_model("openff-gnn-am1bcc-0.1.0-rc.3.pt") -def test_error_on_missing_file(monkeypatch): - with ( - pytest.raises( +# +def test_error_on_missing_file(): + with pytest.raises( FileNotFoundError, - match="Could not find asset with name 'FOOBAR", - ), - monkeypatch.context() as m, - ): - # m.setattr( - # urllib.request, - # "urlretrieve", - # mocked_urlretrieve, - # ) - # m.setattr( - # openff.nagl_models._dynamic_fetch, - # "get_release_metadata", - # mocked_get_release_metadata, - # ) - + match="Could not find asset with name 'FOOBAR"): get_model("FOOBAR.pt") -def test_error_on_bad_file_suffix(monkeypatch): - with ( - pytest.raises( +def test_error_on_bad_file_suffix(): + with pytest.raises( BadFileSuffixError, - match="NAGLToolkitWrapper does not recognize file path extension", - ), - monkeypatch.context() as m, - ): - # m.setattr( - # urllib.request, - # "urlretrieve", - # mocked_urlretrieve, - # ) - # m.setattr( - # openff.nagl_models._dynamic_fetch, - # "get_release_metadata", - # mocked_get_release_metadata, - # ) + match="NAGLToolkitWrapper does not recognize file path extension"): get_model("FOOBAR.txt") @@ -175,102 +146,24 @@ def test_error_on_bad_file_suffix(monkeypatch): "openff-gnn-am1bcc-0.1.0-rc.3.pt", ], ) -def test_all_models_loadable(model, monkeypatch): +def test_all_models_loadable(model): pytest.importorskip("openff.nagl") from openff.nagl.nn._models import GNNModel - with monkeypatch.context() as m: - m.setattr( - urllib.request, - "urlretrieve", - mocked_urlretrieve, - ) - # m.setattr( - # openff.nagl_models._dynamic_fetch, - # "get_release_metadata", - # mocked_get_release_metadata, - # ) + GNNModel.load(get_model(model), eval_mode=True) - GNNModel.load(get_model(model), eval_mode=True) +def test_user_provided_hash_conflicts_with_known_hash(): + with pytest.raises(HashComparisonFailedException): + get_model("openff-gnn-am1bcc-0.1.0-rc.3.pt", file_hash="wrong_hash") -def test_get_model_by_doi_and_hash(hide_cache, monkeypatch): - # This test uses a Zenodo sandbox DOI (10.5072 prefix) and the corresponding - # SHA256 hash of the test file uploaded to that sandbox record - with monkeypatch.context() as m: - # m.setattr( - # openff.nagl_models._dynamic_fetch, - # "get_release_metadata", - # mocked_get_release_metadata, - # ) - get_model( - "my_favorite_model.pt", - doi="10.5072/zenodo.278300", - file_hash="127eb0b9512f22546f8b455582bcd85b2521866d32b86d231fee26d4771b1d81", - ) +def test_malformed_doi(): + with pytest.raises(UnableToParseDOIException): + get_model("nonexistent.pt", doi="zenodo.278300") -def test_get_model_by_doi_no_hash(hide_cache, monkeypatch): - with monkeypatch.context() as m: - # m.setattr( - # openff.nagl_models._dynamic_fetch, - # "get_release_metadata", - # mocked_get_release_metadata, - # ) - - get_model("my_favorite_model.pt", doi="10.5072/zenodo.278300") - - -def test_get_model_hash_comparison_fails(monkeypatch): - with monkeypatch.context() as m: - # m.setattr( - # openff.nagl_models._dynamic_fetch, - # "get_release_metadata", - # mocked_get_release_metadata, - # ) - - with pytest.raises(HashComparisonFailedException): - get_model( - "my_favorite_model.pt", - doi="10.5072/zenodo.278300", - file_hash="wrong_hash", - ) - -def test_user_provided_hash_conflicts_with_known_hash(monkeypatch): - with monkeypatch.context() as m: - # m.setattr( - # openff.nagl_models._dynamic_fetch, - # "get_release_metadata", - # mocked_get_release_metadata, - # ) - with pytest.raises(HashComparisonFailedException): - get_model("openff-gnn-am1bcc-0.1.0-rc.3.pt", file_hash="wrong_hash") - - -def test_malformed_doi(monkeypatch, hide_cache): - with monkeypatch.context() as m: - m.setattr( - urllib.request, - "urlretrieve", - mocked_urlretrieve, - ) - # m.setattr( - # openff.nagl_models._dynamic_fetch, - # "get_release_metadata", - # mocked_get_release_metadata, - # ) - - with pytest.raises(UnableToParseDOIException): - get_model("my_favorite_model.pt", doi="zenodo.278300") - - -def test_no_matching_file_at_doi(monkeypatch): - with monkeypatch.context() as m: - # m.setattr( - # openff.nagl_models._dynamic_fetch, - # "get_release_metadata", - # mocked_get_release_metadata, - # ) - with pytest.raises(FileNotFoundError, match="sandbox.zenodo"): - get_model("file_that_doesnt_exist.pt", doi="10.5072/zenodo.278300") + +def test_no_matching_file_at_doi(): + with pytest.raises(FileNotFoundError, match="sandbox.zenodo"): + get_model("file_that_doesnt_exist.pt", doi="10.5072/zenodo.278300") From 8920efdc6a0fb4f2dee61c3dc3794812b223dfd5 Mon Sep 17 00:00:00 2001 From: Jeffrey Wagner Date: Wed, 6 Aug 2025 11:50:45 -0700 Subject: [PATCH 08/13] remove commented code --- openff/nagl_models/_dynamic_fetch.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/openff/nagl_models/_dynamic_fetch.py b/openff/nagl_models/_dynamic_fetch.py index 1306680..c8a3eb7 100644 --- a/openff/nagl_models/_dynamic_fetch.py +++ b/openff/nagl_models/_dynamic_fetch.py @@ -104,22 +104,6 @@ def get_model( return cached_path.as_posix() - # release_metadata = get_release_metadata() - # - # # tags with "v" prefix can't easily be sorted, but the result of passing through Version - # # are not necessarily 1:1 with the metadata in the releases, keep both and map between - # releases: dict[Version:str] = { - # Version(release["tag_name"]): release for release in release_metadata - # } - # - # for version in reversed(sorted(releases)): - # release = releases[version] - # for file in release["assets"]: - # if file["name"] == filename: - # return _download_and_verify_file( - # file["browser_download_url"], cached_path, file_hash - # ) - # Otherwise try to fetch from DOI if doi: try: From e5f916421a15886aa0f7451d6ddf72ced5e90b05 Mon Sep 17 00:00:00 2001 From: Jeff Wagner Date: Wed, 6 Aug 2025 19:17:15 -0700 Subject: [PATCH 09/13] Apply suggestions from code review Co-authored-by: Matt Thompson --- openff/nagl_models/_dynamic_fetch.py | 8 ++++---- openff/nagl_models/tests/test_dynamic_fetch.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/openff/nagl_models/_dynamic_fetch.py b/openff/nagl_models/_dynamic_fetch.py index c8a3eb7..d30c640 100644 --- a/openff/nagl_models/_dynamic_fetch.py +++ b/openff/nagl_models/_dynamic_fetch.py @@ -44,9 +44,9 @@ def get_model( ) -> str: """ Return the path of a model as cached on disk, downloading if necessary. The lookup order of this implementation is: - 1. Try to retrieve the file from the openff-nagl-models python package + 1. Try to retrieve the file from the installed `openff-nagl-models` python package on disk 2. Try to retrieve the file from the local cache - 3. Try to fetch the file from the DOI, if provided + 3. Try to fetch the file from the Zenodo DOI, if provided This method will raise an HashComparisonFailedException as soon as a hash mismatch is encountered. So if there's a file with a matching name but a non-matching hash in the local cache, an exception will be raised @@ -80,8 +80,8 @@ def get_model( FileNotFoundError """ if not(filename.endswith(".pt")): - raise BadFileSuffixError(f"NAGLToolkitWrapper does not recognize file path extension " - f"on {filename=}, expected '.pt' suffix") + raise BadFileSuffixError(f"OpenFF NAGL models are based on PyTorch files and expect a `.pt` suffix. Found an unrecognized file path extension " + f"on {filename=}") pathlib.Path(CACHE_DIR).mkdir(exist_ok=True) # See if the file has a known hash diff --git a/openff/nagl_models/tests/test_dynamic_fetch.py b/openff/nagl_models/tests/test_dynamic_fetch.py index c26e868..a2fa71e 100644 --- a/openff/nagl_models/tests/test_dynamic_fetch.py +++ b/openff/nagl_models/tests/test_dynamic_fetch.py @@ -93,7 +93,7 @@ def test_zenodo_fetching_and_caching(hide_cache): "my_favorite_model.pt", ) - # Ensure that cached files can be accessed when only doi is provided + # Ensure that a network call is not made if the requested file is in the cache get_model( "my_favorite_model.pt", doi="10.5072/zenodo.278300", From 75b5c1d1479bb789da43b01d14c14368918145d3 Mon Sep 17 00:00:00 2001 From: Jeffrey Wagner Date: Wed, 6 Aug 2025 19:48:41 -0700 Subject: [PATCH 10/13] remove unused test function --- openff/nagl_models/tests/test_dynamic_fetch.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/openff/nagl_models/tests/test_dynamic_fetch.py b/openff/nagl_models/tests/test_dynamic_fetch.py index a2fa71e..030e8aa 100644 --- a/openff/nagl_models/tests/test_dynamic_fetch.py +++ b/openff/nagl_models/tests/test_dynamic_fetch.py @@ -13,19 +13,6 @@ BadFileSuffixError, ) - -def mocked_urlretrieve(url, filename): - """Mock downloading files from assets by copying from the models/ directory.""" - old = ( - pathlib.Path(root).parent / "models/am1bcc" / pathlib.Path(filename).name - ).as_posix() - new = (platformdirs.user_cache_path() / "OPENFF_NAGL_MODELS" / filename).as_posix() - - shutil.copy(old, new) - - return new, None - - @pytest.fixture def hide_cache(): cache_dir = platformdirs.user_cache_path() / "OPENFF_NAGL_MODELS" From fdb6c28014af99ff8c19a730a4a6b4c2e56d7df6 Mon Sep 17 00:00:00 2001 From: Jeffrey Wagner Date: Wed, 6 Aug 2025 20:07:47 -0700 Subject: [PATCH 11/13] more thoroughly check that test model isn't slipping in outside the context of the tests --- openff/nagl_models/tests/test_dynamic_fetch.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/openff/nagl_models/tests/test_dynamic_fetch.py b/openff/nagl_models/tests/test_dynamic_fetch.py index 030e8aa..6a1f66e 100644 --- a/openff/nagl_models/tests/test_dynamic_fetch.py +++ b/openff/nagl_models/tests/test_dynamic_fetch.py @@ -45,6 +45,8 @@ def test_zenodo_fetching_and_caching(hide_cache): # openff-gnn-am1bcc-0.1.0-rc.3.pt) uploaded to that sandbox record from pytest_socket import SocketBlockedError, disable_socket, enable_socket + from openff.nagl_models._dynamic_fetch import CACHE_DIR + from openff.nagl_models import get_nagl_model_dirs_paths disable_socket() @@ -55,6 +57,11 @@ def test_zenodo_fetching_and_caching(hide_cache): "my_favorite_model.pt", ) + # Ensure the test file isn't in the cache or the nagl_models package + assert not (os.path.exists(CACHE_DIR / 'my_favorite_model.pt')) + for dir_path in get_nagl_model_dirs_paths(): + assert not (os.path.exists(dir_path / 'my_favorite_model.pt')) + # Ensure that trying to fetch a # model fails due to lack of internet access with pytest.raises( @@ -73,6 +80,8 @@ def test_zenodo_fetching_and_caching(hide_cache): doi="10.5072/zenodo.278300", ) + # Ensure that the file is really in the cache + assert os.path.exists(CACHE_DIR / 'my_favorite_model.pt') # Ensure that, once fetched, the file can be gotten without accessing the internet. disable_socket() # Ensure that cached files can be accessed when no optional arguments are provided From 405889bf5008d5aea3253e163138ee5546ba3f31 Mon Sep 17 00:00:00 2001 From: Jeffrey Wagner Date: Wed, 6 Aug 2025 20:08:20 -0700 Subject: [PATCH 12/13] fix regex in pytest.raises for new error message --- openff/nagl_models/tests/test_dynamic_fetch.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/openff/nagl_models/tests/test_dynamic_fetch.py b/openff/nagl_models/tests/test_dynamic_fetch.py index 6a1f66e..84dfa48 100644 --- a/openff/nagl_models/tests/test_dynamic_fetch.py +++ b/openff/nagl_models/tests/test_dynamic_fetch.py @@ -116,9 +116,6 @@ def test_zenodo_fetching_and_caching(hide_cache): file_hash="wrong_hash", ) - - -# def test_error_on_missing_file(): with pytest.raises( FileNotFoundError, @@ -128,7 +125,7 @@ def test_error_on_missing_file(): def test_error_on_bad_file_suffix(): with pytest.raises( BadFileSuffixError, - match="NAGLToolkitWrapper does not recognize file path extension"): + match="Found an unrecognized file path extension on filename='FOOBAR.txt'"): get_model("FOOBAR.txt") From 31a520ec39d405477bb97bd94e70f22846fe5ee5 Mon Sep 17 00:00:00 2001 From: Jeffrey Wagner Date: Wed, 6 Aug 2025 20:13:35 -0700 Subject: [PATCH 13/13] update releasenotes --- CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70ba306..997e353 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,19 @@ The rules for this file: * accompany each entry with github issue/PR number (Issue #xyz) --> +## Current development + +### Authors + +- @lilyminium +- @Yoshanuikabundi +- @mattwthompson +- @j-wags +- @jaclark5 (assisted with debugging caching issues) + +### New features +- Added fetching by DOI, hash verification, and caching. (#44, #61, #62) + ## v0.3.0 - 2024-07-29 ### Authors