From d8395aeb778a1b29f6ed5f4996675c9f1baebc94 Mon Sep 17 00:00:00 2001
From: usernames122 <88596366+usernames122@users.noreply.github.com>
Date: Sat, 4 Apr 2026 19:47:14 +0200
Subject: [PATCH 1/2] Add attribute check for sampler in close method

This solves a bug I uncovered, that causes an AttributeError if constantly re-initializing a model in a loop and Python garbage collects it, such as testing the highest GPU layer count you can go before CUDA OOMs.
---
 llama_cpp/_internals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py
index cde52c8c8..35abfa537 100644
--- a/llama_cpp/_internals.py
+++ b/llama_cpp/_internals.py
@@ -76,7 +76,7 @@ def free_model():
         self._exit_stack.callback(free_model)
 
     def close(self):
-        if self.sampler is not None:
+        if hasattr(self, "sampler") and self.sampler is not None:
             # NOTE: Must remove custom samplers before free or llama.cpp will try to free them
             for i, _ in reversed(self.custom_samplers):
                 llama_cpp.llama_sampler_chain_remove(self.sampler, i)

From 15d7ea1708374d3dbd1408121d80a3a16a03bcc8 Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Sun, 31 May 2026 04:26:26 -0700
Subject: [PATCH 2/2] fix: avoid cleanup errors for partial model init

---
 CHANGELOG.md            | 1 +
 llama_cpp/_internals.py | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d8cb4e10..a0eb9da88 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+- fix: avoid cleanup errors for partially initialized `LlamaModel` objects by @usernames122 in #2173
 - fix: suppress stdout and stderr in Jupyter notebooks by @Anai-Guo in #2181
 - feat: enable arm64 musl builds by @acon96 in #2221
 - feat: Update llama.cpp to ggml-org/llama.cpp@d749821db
diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py
index bf2a8ed23..b0fe94d01 100644
--- a/llama_cpp/_internals.py
+++ b/llama_cpp/_internals.py
@@ -44,6 +44,9 @@ def __init__(
         self.params = params
         self.verbose = verbose
         self._exit_stack = ExitStack()
+        # LlamaModel does not use samplers, but close() can run after partial init.
+        self.sampler = None
+        self.custom_samplers = []
 
         model = None
 
@@ -65,7 +68,6 @@ def __init__(
 
         self.model = model
         self.vocab = vocab
-        self.sampler = None  # LlamaModel doesn't use samplers, but some cleanup code expects this attribute
 
         def free_model():
             if self.model is None:
@@ -76,7 +78,7 @@ def free_model():
         self._exit_stack.callback(free_model)
 
     def close(self):
-        if hasattr(self, "sampler") and self.sampler is not None:
+        if self.sampler is not None:
             # NOTE: Must remove custom samplers before free or llama.cpp will try to free them
             for i, _ in reversed(self.custom_samplers):
                 llama_cpp.llama_sampler_chain_remove(self.sampler, i)