From d8395aeb778a1b29f6ed5f4996675c9f1baebc94 Mon Sep 17 00:00:00 2001 From: usernames122 <88596366+usernames122@users.noreply.github.com> Date: Sat, 4 Apr 2026 19:47:14 +0200 Subject: [PATCH 1/2] Add attribute check for sampler in close method This solves a bug I uncovered, that causes an AttributeError if constantly re-initializing a model in a loop and Python garbage collects it, such as testing the highest GPU layer count you can go before CUDA OOMs. --- llama_cpp/_internals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py index cde52c8c8..35abfa537 100644 --- a/llama_cpp/_internals.py +++ b/llama_cpp/_internals.py @@ -76,7 +76,7 @@ def free_model(): self._exit_stack.callback(free_model) def close(self): - if self.sampler is not None: + if hasattr(self, "sampler") and self.sampler is not None: # NOTE: Must remove custom samplers before free or llama.cpp will try to free them for i, _ in reversed(self.custom_samplers): llama_cpp.llama_sampler_chain_remove(self.sampler, i) From 15d7ea1708374d3dbd1408121d80a3a16a03bcc8 Mon Sep 17 00:00:00 2001 From: abetlen Date: Sun, 31 May 2026 04:26:26 -0700 Subject: [PATCH 2/2] fix: avoid cleanup errors for partial model init --- CHANGELOG.md | 1 + llama_cpp/_internals.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d8cb4e10..a0eb9da88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- fix: avoid cleanup errors for partially initialized `LlamaModel` objects by @usernames122 in #2173 - fix: suppress stdout and stderr in Jupyter notebooks by @Anai-Guo in #2181 - feat: enable arm64 musl builds by @acon96 in #2221 - feat: Update llama.cpp to ggml-org/llama.cpp@d749821db diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py index bf2a8ed23..b0fe94d01 100644 --- a/llama_cpp/_internals.py +++ b/llama_cpp/_internals.py @@ -44,6 +44,9 @@ def __init__( self.params = params self.verbose = verbose self._exit_stack = ExitStack() + # LlamaModel does not use samplers, but close() can run after partial init. + self.sampler = None + self.custom_samplers = [] model = None @@ -65,7 +68,6 @@ def __init__( self.model = model self.vocab = vocab - self.sampler = None # LlamaModel doesn't use samplers, but some cleanup code expects this attribute def free_model(): if self.model is None: @@ -76,7 +78,7 @@ def free_model(): self._exit_stack.callback(free_model) def close(self): - if hasattr(self, "sampler") and self.sampler is not None: + if self.sampler is not None: # NOTE: Must remove custom samplers before free or llama.cpp will try to free them for i, _ in reversed(self.custom_samplers): llama_cpp.llama_sampler_chain_remove(self.sampler, i)