lightspeed-core · radofuchs · Jun 5, 2026 · Jun 5, 2026 · coderabbitai · Jun 5, 2026
diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml
@@ -114,6 +114,20 @@ jobs:
           echo "=== lightspeed-stack.yaml ==="
           grep -A 3 "llama_stack:" lightspeed-stack.yaml
 
+      - name: Cache HuggingFace embedding model
+        uses: actions/cache@v4
-        uses: actions/cache@v4
+        uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8  # v4.1.1
-        uses: actions/cache@v4
+        uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8  # v4.1.1
+        with:
+          path: /tmp/hf-cache
+          key: hf-sentence-transformers-all-mpnet-base-v2
+
+      - name: Pre-download HuggingFace embedding model
+        env:
+          HF_HOME: /tmp/hf-cache
+        run: |
+          pip install -q sentence-transformers
-          pip install -q sentence-transformers
+          pip install -q sentence-transformers==3.3.1
-          pip install -q sentence-transformers
+          pip install -q sentence-transformers==3.3.1
+          python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-mpnet-base-v2')"
+          echo "HF_CACHE_PATH=/tmp/hf-cache" >> $GITHUB_ENV
+
       - name: Docker Login for quay access
         if: matrix.mode == 'server'
         env:

diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml
@@ -19,6 +19,7 @@ services:
       - ./run.yaml:/app-root/run.yaml:Z
       - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro
       - ./tests/e2e/rag:/opt/app-root/src/.llama/storage/rag:Z
+      - ${HF_CACHE_PATH:-./tmp/.hf-cache}:/opt/app-root/src/.cache/huggingface
       - ./tests/e2e/secrets/mcp-token:/tmp/mcp-token:ro,z
       - ./tests/e2e/secrets/invalid-mcp-token:/tmp/invalid-mcp-token:ro,z
     environment:
@@ -57,6 +58,8 @@ services:
       - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
       # FAISS test and inline RAG config
       - FAISS_VECTOR_STORE_ID=${FAISS_VECTOR_STORE_ID:-}
+      # Prevent HuggingFace Hub update checks (HTTP 429 rate-limiting in CI from parallel jobs).
+      - HF_HUB_OFFLINE=1
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8080/liveness"]
       interval: 10s   # how often to run the check

diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -21,6 +21,7 @@ services:
       - llama-storage:/opt/app-root/src/.llama/storage
       - ./tests/e2e/rag:/opt/app-root/src/.llama/storage/rag:z
       - mock-tls-certs:/certs:ro
+      - ${HF_CACHE_PATH:-./tmp/.hf-cache}:/opt/app-root/src/.cache/huggingface
     environment:
       - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
       - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
@@ -57,6 +58,8 @@ services:
       - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
       # FAISS test
       - FAISS_VECTOR_STORE_ID=${FAISS_VECTOR_STORE_ID:-}
+      # Prevent HuggingFace Hub update checks (HTTP 429 rate-limiting in CI from parallel jobs).
+      - HF_HUB_OFFLINE=1
       # OKP/Solr RAG
       - RH_SERVER_OKP=${RH_SERVER_OKP:-}
       - SOLR_URL=${SOLR_URL:-}