Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion .github/workflows/build-docker-image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,26 @@ name: Build Docker Image

on:
workflow_dispatch:

# Trigger on tag push
push:
tags:
- 'v*'
- 'v*' # Matches tags starting with 'v', like 'v1.0', 'v2.1.3', etc.


jobs:
# Build and push Docker image
docker:
runs-on: ubuntu-22.04

steps:
# Checkout repository
- name: Checkout
uses: actions/checkout@v6
with:
submodules: recursive

# Set up QEMU and Docker Buildx
- name: Set up QEMU
uses: docker/setup-qemu-action@v3

Expand Down Expand Up @@ -42,7 +49,10 @@ jobs:
with:
context: .
file: docker/simple/Dockerfile

# Currently only building for linux/amd64, due to ARM issues with some dependencies
platforms: linux/amd64

push: true
tags: |
ghcr.io/thebigeye/llama-cpp-python:latest
Expand Down
51 changes: 41 additions & 10 deletions .github/workflows/build-release-publish.yaml
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
# Links for {package_name}

name: Build and Publish Wheels

# NOTE: This was a headcache to set up GitHub Actions for building and publishing wheels.
# - This workflow builds wheels for CPU and CUDA (12.1 to 12.4) on multiple Python versions.
# - It creates GitHub Releases for each build and uploads the wheels there.
# - It generates a PEP 503-compliant simple index for installing via pip from GitHub Pages.
# - The workflow is triggered on new tags (v*), manual dispatch, and weekly schedule.
# This could broke on any changes to the repo structure or GitHub Actions environment. (i need make this more robust eventually)

on:
workflow_dispatch:
push:
tags: ["v*"]
schedule:
- cron: '0 0 * * 0'

# permissions needed for releases and GitHub Pages
permissions:
contents: write
pages: write
id-token: write

# limit concurrent, i mean, don't run multiple builds at once
concurrency:
group: "pages"
cancel-in-progress: false


jobs:
get_version:
name: Get Version
Expand All @@ -29,6 +37,8 @@ jobs:
- name: Extract version
id: set-version
shell: pwsh

# extract __version__ from llama_cpp/__init__.py
run: |
$version = Select-String -Path "llama_cpp/__init__.py" -Pattern '__version__ = "([^"]+)"' | % { $_.Matches.Groups[1].Value }
Write-Output "version=$version" >> $env:GITHUB_OUTPUT
Expand Down Expand Up @@ -155,7 +165,7 @@ jobs:
- name: Install dependencies (Windows)
if: runner.os == 'Windows'
env:
RUST_LOG: trace
RUST_LOG: trace
run: |
python -m pip install --upgrade pip
python -m pip install uv
Expand All @@ -178,6 +188,12 @@ jobs:
if-no-files-found: warn
overwrite: true

# Collect and Release Jobs
# Here we collect the built wheels and create GitHub Releases for each configuration
# if a release with the same tag exists, we delete it first to avoid conflicts
# then we create a new release and upload the wheels, overwriting existing assets if any
# This ensures that the latest wheels are always available under the same release tags

collect_cpu:
name: Collect and Release CPU Wheels
needs: [get_version, build_wheels_cpu]
Expand Down Expand Up @@ -209,7 +225,7 @@ jobs:
tag="v${{ needs.get_version.outputs.version }}"
git tag -f "$tag"
git push -f origin "$tag"
gh release create "$tag" --title "CPU wheels v${{ needs.get_version.outputs.version }}" --notes "Automated build"
gh release create "$tag" --title "v${{ needs.get_version.outputs.version }}" --notes "Automated build"

- name: Upload wheels
run: gh release upload "v${{ needs.get_version.outputs.version }}" dist/*.whl --clobber
Expand Down Expand Up @@ -243,19 +259,23 @@ jobs:
tag="v${{ needs.get_version.outputs.version }}-${{ matrix.short }}"
git tag -f "$tag"
git push -f origin "$tag"
gh release create "$tag" --title "CUDA ${{ matrix.short }} wheels v${{ needs.get_version.outputs.version }}" --notes "Automated build"
gh release create "$tag" --title "v${{ needs.get_version.outputs.version }}-${{ matrix.short }}" --notes "Automated build"

- name: Upload wheels
run: gh release upload "v${{ needs.get_version.outputs.version }}-${{ matrix.short }}" dist/*.whl --clobber

# Generate PEP 503-compliant simple index for pip installation from GitHub Pages
# See: https://peps.python.org/pep-0503/
generate_pypi_index:
name: Generate PyPI Index
needs: [get_version, collect_cpu, collect_cuda]
runs-on: ubuntu-22.04
env:
GITHUB_TOKEN: ${{ github.token }}

steps:
- name: Create base directory
# TODO: make this less hardcoded
run: mkdir -p dist/whl/{cpu,cu121,cu122,cu123,cu124}

- name: Generate PEP 503 indices using GitHub API
Expand All @@ -264,6 +284,10 @@ jobs:
REPO: ${{ github.repository }}
REPO_OWNER: ${{ github.repository_owner }}
REPO_NAME: ${{ github.event.repository.name }}

# Due to eventual consistency delays in GitHub Releases API, we implement retries here
# so that the action waits for the releases to be available and avoid missing assets and wheels.
# Yes, i could have a external script, but this is easier to manage in one file for now.
run: |
python - << 'PYEOF'
import os
Expand All @@ -278,6 +302,8 @@ jobs:

package_name = "llama-cpp-python"

# Define configurations and their corresponding release tags
# by example: 'cu121' -> 'v{version}-cu121'
configs = {
'cpu': {'tag': f'v{version}', 'label': 'CPU'},
'cu121': {'tag': f'v{version}-cu121', 'label': 'CUDA 12.1'},
Expand All @@ -286,17 +312,20 @@ jobs:
'cu124': {'tag': f'v{version}-cu124', 'label': 'CUDA 12.4'},
}

# GitHub API headers
headers = {
'Authorization': f'token {token}',
'Accept': 'application/vnd.github.v3+json'
}

available_configs = []

# Process each configuration
# For each, fetch the release info and generate the simple index files
for config, info in configs.items():
tag = info['tag']
url = f'https://api.github.com/repos/{repo}/releases/tags/{tag}'

# Retry up to 5 times (releases can take seconds to propagate)
for attempt in range(5):
response = requests.get(url, headers=headers)
Expand Down Expand Up @@ -347,7 +376,7 @@ jobs:
' <body>',
' <h1>🦙 llama-cpp-python Wheels</h1>',
' <p>Pre-compiled wheels with CPU and CUDA support (hosted on GitHub Releases).</p>',
' <p>Note: pip may warn about no hash verification safe as downloads are direct from GitHub.</p>',
' <p>Note: pip may warn about no hash verification - safe as downloads are direct from GitHub.</p>',
]

for config, label in available_configs:
Expand All @@ -374,15 +403,17 @@ jobs:
print("✓ Generated root index")
PYEOF

# Create README for the GitHub Pages site
# TODO: make it more beautiful later
- name: Create README
run: |
cat > dist/README.md << 'EOF'
# 🦙 llama-cpp-python Wheels

Pre-compiled wheels with CPU and CUDA support.

See main page for installation commands.

**Repository**: https://github.com/${{ github.repository }}
EOF

Expand Down
36 changes: 8 additions & 28 deletions .github/workflows/build-wheel-cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,40 +14,26 @@ jobs:
- uses: actions/checkout@v6
with:
submodules: recursive

- uses: actions/setup-python@v6
with:
python-version: "3.11"

- name: Install cibuildwheel
run: |
python -m pip install -U pip
python -m pip install -U cibuildwheel wheel

- name: Build wheel (CPU only, skip repair)
- name: Build wheel (CPU only)
env:
CIBW_BUILD: "cp310-manylinux_x86_64"
# Build wheels for Python 3.9 to 3.14 on manylinux_x86_64
CIBW_BUILD: "cp39-manylinux_x86_64 cp310-manylinux_x86_64 cp311-manylinux_x86_64 cp312-manylinux_x86_64 cp313-manylinux_x86_64 cp314-manylinux_x86_64"
CIBW_SKIP: "*-musllinux_*"
CIBW_BUILD_VERBOSITY: 1
CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
# Use newer manylinux_2_28 image for better compatibility with recent Python versions (3.12+) and modern glibc
CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
CIBW_REPAIR_WHEEL_COMMAND_LINUX: ""
CIBW_ENVIRONMENT_LINUX: >-
CMAKE_ARGS="-DGGML_CUDA=OFF -DGGML_METAL=OFF -DGGML_VULKAN=OFF -DGGML_BLAS=OFF -DGGML_NATIVE=OFF"
run: |
python -m cibuildwheel --output-dir wheelhouse_temp

- name: Rename wheel to manylinux
run: |
mkdir -p wheelhouse
cd wheelhouse_temp
for wheel in *.whl; do
new_name=$(echo "$wheel" | sed 's/linux_x86_64/manylinux_2_17_x86_64.manylinux2014_x86_64/')
mv "$wheel" "../wheelhouse/$new_name"
echo "Created: $new_name"
done
cd ..
ls -lh wheelhouse/

python -m cibuildwheel --output-dir wheelhouse
- uses: actions/upload-artifact@v6
with:
name: wheelhouse-linux
Expand All @@ -61,29 +47,25 @@ jobs:
- uses: actions/checkout@v6
with:
submodules: recursive

- uses: actions/setup-python@v6
with:
python-version: "3.11"

- name: Install cibuildwheel
run: |
python -m pip install -U pip
python -m pip install -U cibuildwheel wheel

- name: Build wheel (CPU only)
env:
CIBW_BUILD: "cp310-win_amd64"
# Build wheels for Python 3.9 to 3.14 on win_amd64
CIBW_BUILD: "cp39-win_amd64 cp310-win_amd64 cp311-win_amd64 cp312-win_amd64 cp313-win_amd64 cp314-win_amd64"
CIBW_BUILD_VERBOSITY: 1
CIBW_ENVIRONMENT_WINDOWS: >-
CMAKE_ARGS="-DGGML_CUDA=OFF -DGGML_METAL=OFF -DGGML_VULKAN=OFF -DGGML_BLAS=OFF -DGGML_NATIVE=OFF"
run: |
python -m cibuildwheel --output-dir wheelhouse

- name: List wheels
run: |
Get-ChildItem -Path wheelhouse -Recurse

- uses: actions/upload-artifact@v6
with:
name: wheelhouse-windows
Expand All @@ -100,10 +82,8 @@ jobs:
with:
path: wheelhouse
merge-multiple: true

- name: List all wheels
run: ls -lh wheelhouse/

- name: Upload to Release
uses: softprops/action-gh-release@v2
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-wheels-cuda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
with:
vs-version: '[16.11,16.12)'

- uses: actions/checkout@v6
- uses: actions/checkout@v4
with:
submodules: "recursive"

Expand Down
11 changes: 6 additions & 5 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
push:
branches:
- main
- dev

env:
REPO_ID: Qwen/Qwen2-0.5B-Instruct-GGUF
Expand Down Expand Up @@ -39,12 +40,12 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v6
with:
submodules: "recursive"

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
with:
Expand All @@ -70,12 +71,12 @@ jobs:
runs-on: windows-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v6
with:
submodules: "recursive"

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
with:
Expand All @@ -94,7 +95,7 @@ jobs:
python -m pip install uv
python -m uv pip install -e .[all] --verbose
shell: cmd

- name: Test with pytest
run: |
python -m pytest
12 changes: 11 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ if (LLAMA_BUILD)
# When building, don't use the install RPATH already
# (but later on when installing)
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)

# Add the automatically determined parts of the RPATH
# which point to directories outside the build tree to the install RPATH
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
Expand Down Expand Up @@ -153,6 +153,15 @@ if (LLAMA_BUILD)
add_compile_definitions(GGML_USE_METAL)
endif()

# Set version for mtmd (required by upstream CMakeLists.txt)
# NOTE: This is a workaround for mtmd build requirements.
# Version is set to 0.0.0 for local builds. If upstream adds version
# compatibility checks, this may need to match llama.cpp version.
if (NOT DEFINED LLAMA_BUILD_NUMBER)
set(LLAMA_BUILD_NUMBER 0)
endif()
set(LLAMA_INSTALL_VERSION 0.0.${LLAMA_BUILD_NUMBER})

# Building llava
add_subdirectory(vendor/llama.cpp/tools/mtmd)

Expand Down Expand Up @@ -185,3 +194,4 @@ if (LLAMA_BUILD)
# target_include_directories(llama-minicpmv-cli PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
endif()
endif()

Loading