From 142721554ab6026719a6bd7dceb761958ce9de34 Mon Sep 17 00:00:00 2001 From: bozhouDev <259759010+bozhouDev@users.noreply.github.com> Date: Fri, 29 May 2026 12:07:34 +0800 Subject: [PATCH] llms/huggingface: update default inference endpoint to router.huggingface.co HuggingFace deprecated https://api-inference.huggingface.co; requests to it now return 404 (the endpoint was retired on 2025-11-01). The package still used it as the default URL, so HuggingFace LLM and embedding calls fail by default unless the caller overrides the URL. Point defaultURL at the hf-inference provider on the new router (https://router.huggingface.co/hf-inference). The client builds request URLs as "%s/models/%s", so the base must omit a trailing /models; the resulting URL is https://router.huggingface.co/hf-inference/models/, which is HuggingFace's documented replacement for the legacy task endpoint. Update the TestHuggingFaceLLMStandardInference replay fixture to target the new endpoint accordingly. Fixes #1428 Signed-off-by: bozhouDev <259759010+bozhouDev@users.noreply.github.com> Co-authored-by: Cursor --- llms/huggingface/huggingfacellm_option.go | 2 +- .../testdata/TestHuggingFaceLLMStandardInference.httprr | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llms/huggingface/huggingfacellm_option.go b/llms/huggingface/huggingfacellm_option.go index e083780d1..fd8204866 100644 --- a/llms/huggingface/huggingfacellm_option.go +++ b/llms/huggingface/huggingfacellm_option.go @@ -13,7 +13,7 @@ const ( xdgCacheHomeEnvVar = "XDG_CACHE_HOME" // XDG cache directory defaultTokenPath = "token" // Default token filename defaultModel = "gpt2" - defaultURL = "https://api-inference.huggingface.co" + defaultURL = "https://router.huggingface.co/hf-inference" routerURL = "https://router.huggingface.co" ) diff --git a/llms/huggingface/testdata/TestHuggingFaceLLMStandardInference.httprr b/llms/huggingface/testdata/TestHuggingFaceLLMStandardInference.httprr index a86901af6..0052de9ea 100644 --- a/llms/huggingface/testdata/TestHuggingFaceLLMStandardInference.httprr +++ b/llms/huggingface/testdata/TestHuggingFaceLLMStandardInference.httprr @@ -1,7 +1,7 @@ httprr trace v1 -325 873 -POST https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta HTTP/1.1 -Host: api-inference.huggingface.co +324 873 +POST https://router.huggingface.co/hf-inference/models/HuggingFaceH4/zephyr-7b-beta HTTP/1.1 +Host: router.huggingface.co User-Agent: langchaingo-httprr Content-Length: 80 Authorization: Bearer test-api-key