Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions .github/workflows/run-compaction.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
name: Run Compaction Bench

on:
workflow_dispatch:
inputs:
dataset:
description: 'Dataset name passed to CompactorBenchmark (-p datasetNames)'
required: false
default: 'ada002-100k'
branches:
description: 'Space-separated list of branches to benchmark'
required: false
default: 'main'
pull_request:
types: [opened, synchronize, ready_for_review]
branches:
- main
paths:
- '**/src/main/java/**'
- 'pom.xml'
- '**/pom.xml'

jobs:
# Job to generate the matrix configuration
generate-matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Generate matrix
id: set-matrix
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
BRANCHES='["main", "${{ github.head_ref }}"]'
elif [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.branches }}" ]]; then
BRANCHES_INPUT="${{ github.event.inputs.branches }}"
BRANCHES="["
for branch in $BRANCHES_INPUT; do
if [[ "$BRANCHES" != "[" ]]; then
BRANCHES="$BRANCHES, "
fi
BRANCHES="$BRANCHES\"$branch\""
done
BRANCHES="$BRANCHES]"
else
BRANCHES='["main"]'
fi

echo "matrix={\"jdk\":[24],\"isa\":[\"isa-avx512f\"],\"branch\":$BRANCHES}" >> $GITHUB_OUTPUT

test-compaction:
needs: generate-matrix
strategy:
matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }}
runs-on: ${{ matrix.isa }}
steps:
- name: Set up GCC
run: sudo apt install -y gcc
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.jdk }}
uses: actions/setup-java@v3
with:
java-version: ${{ matrix.jdk }}
distribution: temurin
cache: maven

- name: Checkout branch
uses: actions/checkout@v4
with:
ref: ${{ matrix.branch }}
fetch-depth: 0

- name: Build branch
run: mvn -B -Punix-amd64-profile package --file pom.xml

- name: Run CompactorBenchmark
id: run-benchmark
run: |
TOTAL_MEM_GB=$(free -g | awk '/^Mem:/ {print $2}')
if [[ -z "$TOTAL_MEM_GB" ]] || [[ "$TOTAL_MEM_GB" -le 0 ]]; then
TOTAL_MEM_GB=16
fi
HALF_MEM_GB=$((TOTAL_MEM_GB / 2))
if [[ "$HALF_MEM_GB" -lt 1 ]]; then
HALF_MEM_GB=1
fi

DATASET="${{ github.event.inputs.dataset }}"
if [[ -z "$DATASET" ]]; then
DATASET="ada002-100k"
fi

SAFE_BRANCH=$(echo "${{ matrix.branch }}" | sed 's/[^A-Za-z0-9_-]/_/g')
echo "safe_branch=$SAFE_BRANCH" >> $GITHUB_OUTPUT

JMH_JAR=$(ls benchmarks-jmh/target/benchmarks-jmh-*.jar | grep -Ev -- '-(javadoc|sources)\.jar$' | head -1)
echo "Using JMH jar: $JMH_JAR"

java --enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector \
-Djvector.experimental.enable_native_vectorization=true \
-Xmx${HALF_MEM_GB}g \
-cp "$JMH_JAR" \
io.github.jbellis.jvector.bench.CompactorBenchmark \
-p workloadMode=PARTITION_AND_COMPACT \
-p datasetNames=$DATASET \
-p numPartitions=4 \
-p splitDistribution=FIBONACCI \
-p indexPrecision=FUSEDPQ \
-jvmArgsPrepend "-Xmx${HALF_MEM_GB}g" \
-wi 0 -i 1 -f 1

- name: Upload compaction results
uses: actions/upload-artifact@v4
with:
name: compaction-results-${{ matrix.isa }}-jdk${{ matrix.jdk }}-${{ steps.run-benchmark.outputs.safe_branch }}
path: target/benchmark-results/compactor-*/compactor-results.jsonl
if-no-files-found: warn
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ local/
dataset_
**/local_datasets/**

### Testing Results
**results**.json
**results**.jsonl

### Bench caches
pq_cache/
index_cache/
Expand Down
39 changes: 38 additions & 1 deletion benchmarks-jmh/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.release>22</maven.compiler.release>
<jmh.version>1.37</jmh.version>
<awssdk.version>2.21.10</awssdk.version>
<!-- Default benchmark arguments (empty) -->
<args></args>
</properties>

<dependencies>
Expand Down Expand Up @@ -53,6 +56,11 @@
<artifactId>log4j-slf4j2-impl</artifactId>
<version>2.24.3</version>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>ec2</artifactId>
<version>${awssdk.version}</version>
</dependency>

</dependencies>

Expand Down Expand Up @@ -94,6 +102,35 @@
</execution>
</executions>
</plugin>

<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<executions>
<execution>
<id>compactor</id>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<skip>false</skip>
<executable>java</executable>
<commandlineArgs>--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector -Djvector.experimental.enable_native_vectorization=true -cp %classpath io.github.jbellis.jvector.bench.CompactorBenchmark ${args}</commandlineArgs>
</configuration>
</execution>
<execution>
<id>analyze</id>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<skip>false</skip>
<executable>java</executable>
<commandlineArgs>-cp %classpath io.github.jbellis.jvector.bench.benchtools.EventLogAnalyzer ${args}</commandlineArgs>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
</project>
Loading
Loading