diff --git a/llms/huggingface/huggingfacellm_option.go b/llms/huggingface/huggingfacellm_option.go
index e083780d1..fd8204866 100644
--- a/llms/huggingface/huggingfacellm_option.go
+++ b/llms/huggingface/huggingfacellm_option.go
@@ -13,7 +13,7 @@ const (
 	xdgCacheHomeEnvVar    = "XDG_CACHE_HOME" // XDG cache directory
 	defaultTokenPath      = "token"          // Default token filename
 	defaultModel          = "gpt2"
-	defaultURL            = "https://api-inference.huggingface.co"
+	defaultURL            = "https://router.huggingface.co/hf-inference"
 	routerURL             = "https://router.huggingface.co"
 )
 
diff --git a/llms/huggingface/testdata/TestHuggingFaceLLMStandardInference.httprr b/llms/huggingface/testdata/TestHuggingFaceLLMStandardInference.httprr
index a86901af6..0052de9ea 100644
--- a/llms/huggingface/testdata/TestHuggingFaceLLMStandardInference.httprr
+++ b/llms/huggingface/testdata/TestHuggingFaceLLMStandardInference.httprr
@@ -1,7 +1,7 @@
 httprr trace v1
-325 873
-POST https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta HTTP/1.1
-Host: api-inference.huggingface.co
+324 873
+POST https://router.huggingface.co/hf-inference/models/HuggingFaceH4/zephyr-7b-beta HTTP/1.1
+Host: router.huggingface.co
 User-Agent: langchaingo-httprr
 Content-Length: 80
 Authorization: Bearer test-api-key